<a href="https://colab.research.google.com/github/satwiksps/Machine_Learning/blob/main/7_Performance_Metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Pima-Indian-Diabetes Dataset

In [16]:
from pandas import read_csv
url = 'https://raw.githubusercontent.com/erojaso/MLMasteryEndToEnd/master/data/pima-indians-diabetes.data.csv' #Load CSV using Pandas
column_names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] #Define column names
data = read_csv(url, names=column_names) #Load CSV using Pandas
array = data.values #Convert to NumPy array
X1 = array[:,0:8] #Split into input column
Y1 = array[:,8] #Split into output column


In [24]:
print("Shape of X1:", X1.shape)
print("Shape of Y1:", Y1.shape)

Shape of X1: (768, 8)
Shape of Y1: (768,)


#Boston-House-Price Dataset.

In [25]:
from pandas import read_csv
url2 = 'https://raw.githubusercontent.com/erojaso/MLMasteryEndToEnd/master/data/housing.NAN.adjust.csv' #Load CSV using Pandas
names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] #Define column names
dataframe = read_csv(url2, names=names) #Load CSV using Pandas
array = dataframe.values #Convert to NumPy array
X2 = array[:,0:13] #Attributes (Split into input rows)
Y2 = array[:,13] #Labels (Split into output column)

In [26]:
print("Shape of X2:", X2.shape)
print("Shape of Y2:", Y2.shape)

Shape of X2: (506, 13)
Shape of Y2: (506,)


#Classification Metrics

**Classification Accuracy**

In [6]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
kfold = KFold(n_splits=10)
model = LogisticRegression(solver='liblinear')
scoring = 'accuracy'
results = cross_val_score(model, X1, Y1, cv=kfold, scoring=scoring)
print("Accuracy: %.3f, Standard Deviation: (%.3f)" % (results.mean()*100.0, results.std()*100.0))

Accuracy: 76.951, Standard Deviation: (4.841)


**Logarithmic Loss**

In [7]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
kfold = KFold(n_splits=10)
model = LogisticRegression(solver='liblinear')
scoring = 'neg_log_loss'
results = cross_val_score(model, X1, Y1, cv=kfold, scoring=scoring)
print("Logloss: %.3f, Standard Deviation: (%.3f)" % (results.mean(), results.std()))

Logloss: -0.493, Standard Deviation: (0.047)


**Area Under ROC Curve**

In [8]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
kfold = KFold(n_splits=10)
model = LogisticRegression(solver='liblinear')
scoring = 'roc_auc'
results = cross_val_score(model, X1, Y1, cv=kfold, scoring=scoring)
print("AUC: %.3f, Standard Deviation: (%.3f)" % (results.mean(), results.std()))

AUC: 0.824, Standard Deviation: (0.041)


**Confusion Matrix**

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
seed = 7
test_size = 0.33
X_train, X_test, Y_train, Y_test = train_test_split(X1, Y1, test_size=test_size, random_state=seed)
model = LogisticRegression(solver='liblinear')
model.fit(X_train, Y_train)
predicted = model.predict(X_test)
matrix = confusion_matrix(Y_test, predicted)
print(matrix)

[[141  21]
 [ 41  51]]


**Classification Report**

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
seed = 7
test_size = 0.33
X_train, X_test, Y_train, Y_test = train_test_split(X1, Y1, test_size=test_size, random_state=seed)
model = LogisticRegression(solver='liblinear')
model.fit(X_train, Y_train)
predicted = model.predict(X_test)
report = classification_report(Y_test, predicted)
print(report)

              precision    recall  f1-score   support

         0.0       0.77      0.87      0.82       162
         1.0       0.71      0.55      0.62        92

    accuracy                           0.76       254
   macro avg       0.74      0.71      0.72       254
weighted avg       0.75      0.76      0.75       254



#Regression Metrics

**Mean Absolute Error**

In [27]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
kfold = KFold(n_splits=10)
model = LinearRegression()
scoring = 'neg_mean_absolute_error'
results = cross_val_score(model, X2, Y2, cv=kfold, scoring=scoring)
print("MAE: %.3f, Standard Deviation: (%.3f)" % (results.mean(), results.std()))

MAE: -4.034, Standard Deviation: (2.114)


**Mean Squared Error**

In [28]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
kfold = KFold(n_splits=10)
model = LinearRegression()
scoring = 'neg_mean_squared_error'
results = cross_val_score(model, X2, Y2, cv=kfold, scoring=scoring)
print("MSE: %.3f, Standard Deviation: (%.3f)" % (results.mean(), results.std()))

MSE: -35.099, Standard Deviation: (45.493)


**R Squared Metric**

In [31]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
kfold = KFold(n_splits=10)
model = LinearRegression()
scoring = 'r2'
results = cross_val_score(model, X2, Y2, cv=kfold, scoring=scoring)
print("R Squared: %.3f, Standard Deviation: (%.3f)" % (results.mean(), results.std()))

R Squared: 0.190, Standard Deviation: (0.594)
