In [None]:
# load required libraries
import pandas as pd
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import KFold, cross_val_score, train_test_split


In [2]:
# load the dataset
filename = "data/pima-indians-diabetes.data.csv"
names = ["preg", "plas", "pres", "skin", "test", "mass", "pedi", "age", "class"]
data = pd.read_csv(filename, names=names)
array = data.values
X = array[:, 0:8]
y = array[:, 8]

### **Classification Metrics**

### Classification Accuracy

In [3]:
# Cross Validation Classification Accuracy
kfold = KFold(n_splits=10, random_state=7, shuffle=True)
model = LogisticRegression(solver="liblinear")
scoring = "accuracy"
results = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
print(f"K-Fold Cross-Validation Accuracy: {results.mean():.4f} ({results.std():.4f})")

K-Fold Cross-Validation Accuracy: 0.7709 (0.0509)


### Logistic Loss

In [5]:
# Cross Validation Classification LogLoss
scoring = "neg_log_loss"
results = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
print(f"K-Fold Cross-Validation LogLoss: {results.mean():.4f} ({results.std():.4f})")

K-Fold Cross-Validation LogLoss: -0.4936 (0.0421)


### Area Under ROC Curve

In [6]:
# Cross Validation Classification ROC AUC
scoring = "roc_auc"
results = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
print(f"K-Fold Cross-Validation ROC AUC: {results.mean():.4f} ({results.std():.4f})")

K-Fold Cross-Validation ROC AUC: 0.8258 (0.0501)


### Confusion Matrix

In [None]:
# Cross Validation Classification Confusion Matrix
test_size = 0.33
seed = 7
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_size, random_state=seed
)
model = LogisticRegression(solver="liblinear")
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[141  21]
 [ 41  51]]


### Classification Report

In [13]:
# Cross Validation Classification Report
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

         0.0       0.77      0.87      0.82       162
         1.0       0.71      0.55      0.62        92

    accuracy                           0.76       254
   macro avg       0.74      0.71      0.72       254
weighted avg       0.75      0.76      0.75       254



### **Regression Metrics**

In [22]:
# Load housing dataset
filename = "data/housing.csv"
names = [
    "CRIM",
    "ZN",
    "INDUS",
    "CHAS",
    "NOX",
    "RM",
    "AGE",
    "DIS",
    "RAD",
    "TAX",
    "PTRATIO",
    "B",
    "LSTAT",
    "MEDV",
]
housing_data = pd.read_csv(filename, sep="\\s+", names=names)
array = housing_data.values
X = array[:, 0:13]
y = array[:, 13]

### Mean Absolute Error

In [23]:
%whos

Variable                Type                Data/Info
-----------------------------------------------------
KFold                   ABCMeta             <class 'sklearn.model_selection._split.KFold'>
LinearRegression        ABCMeta             <class 'sklearn.linear_mo<...>._base.LinearRegression'>
LogisticRegression      type                <class 'sklearn.linear_mo<...>stic.LogisticRegression'>
X                       ndarray             506x13: 6578 elems, type `float64`, 52624 bytes
X_test                  ndarray             254x8: 2032 elems, type `float64`, 16256 bytes
X_train                 ndarray             514x8: 4112 elems, type `float64`, 32896 bytes
array                   ndarray             506x14: 7084 elems, type `float64`, 56672 bytes
classification_report   function            <function classification_<...>rt at 0x0000027ED4C99080>
cm                      ndarray             2x2: 4 elems, type `int64`, 32 bytes
confusion_matrix        function            <function 

In [25]:
# Cross Validation Regression MAE
kfold = KFold(n_splits=10, random_state=7, shuffle=True)
model = LinearRegression()
scoring = "neg_mean_absolute_error"
results = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
print(f"K-Fold Cross-Validation MAE: {results.mean():.4f} ({results.std():.4f})")

K-Fold Cross-Validation MAE: -3.3870 (0.6667)


### Mean Squared Error

In [26]:
# Cross Validation Regression MSE
scoring = "neg_mean_squared_error"
results = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
print(f"K-Fold Cross-Validation MSE: {results.mean():.4f} ({results.std():.4f})")

K-Fold Cross-Validation MSE: -23.7465 (11.1434)


### *R<sup>2</sup>* Metric

In [27]:
# Cross Validation Regression R^2
scoring = "r2"
results = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
print(f"K-Fold Cross-Validation R^2: {results.mean():.4f} ({results.std():.4f})")

K-Fold Cross-Validation R^2: 0.7182 (0.0987)
