# Supervised Learning Classification: Evaluation

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv('https://raw.githubusercontent.com/loukjsmalbil/datasets_ws/master/titanic_preprocessed.csv')
data = data.drop(columns=['male'])

In [None]:
data.head()

# Logistic Regression

## Data Processing (LR)


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
y = data['survived'] 

In [None]:
y

In [None]:
X = data.drop('survived', axis=1)  

In [None]:
X

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

## Train the Model (LR)

In [None]:
from sklearn.linear_model import LogisticRegression 

In [None]:
model = LogisticRegression()

In [None]:
model.fit(X_train, y_train)

## Test the Model

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
y_pred = model.predict(X_test)

In [None]:
pd.DataFrame({'test':y_test, 'predicted':y_pred})

In [None]:
confusion_matrix(y_test, y_pred)

# TP | FN
# -------
# FP | TN

In [None]:
accuracy_score(y_test, y_pred)

## Other Metrics

In [None]:
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

In [None]:
recall_score(y_test, y_pred)

In [None]:
precision_score(y_test, y_pred)

In [None]:
from sklearn.metrics import f1_score

In [None]:
f1_score(y_test, y_pred)

In [None]:
F1 = (2 * (precision_score(y_test, y_pred) * recall_score(y_test, y_pred))) / (precision_score(y_test, y_pred) + recall_score(y_test, y_pred))
print(F1)

In [None]:
import matplotlib.pyplot as plt  
from sklearn import datasets, metrics, model_selection, svm

In [None]:
y_pred

In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)

In [None]:
thresholds

In [None]:
X, y = datasets.make_classification(random_state=0)

metrics.plot_roc_curve(model, X_test, y_test)  
plt.show()                                   

# Supervised Learning Regression: Evaluation

In [None]:
import seaborn as sns
sns.load_dataset('titanic')

In [None]:
y = data['fare'] 
X = data.drop('fare', axis=1)  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [None]:
from sklearn.linear_model import LinearRegression 


In [None]:
model = LinearRegression()

In [None]:
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)

In [None]:
pd.DataFrame({'test':y_test, 'predicted':y_pred})

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [None]:
mean_absolute_error(y_test, y_pred)

In [None]:
mean_squared_error(y_test, y_pred)

In [None]:
np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
from sklearn.metrics import r2_score

In [None]:
r2_score(y_test, y_pred)

## MAE and MSE (Differences)

In [None]:
# Not too much deviation between true and pred
true = [5, 10, 20,5]
pred = [6,11,19,5]

In [None]:
print(mean_absolute_error(true, pred))
print(mean_squared_error(true, pred))

In [None]:
# Much deviation between true and pred
true = [5, 10, 20,5]
pred = [7,9,15,5]

In [None]:
print(mean_absolute_error(true, pred))
print(mean_squared_error(true, pred))

In [None]:
# A lot of deviation between true and pred
true = [5, 10, 20,5]
pred = [7,9,10,8]

In [None]:
print(mean_absolute_error(true, pred))
print(mean_squared_error(true, pred))