In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

In [2]:
def model_fit_predict(X_train, y_train):
    kf = StratifiedKFold(n_splits = 10, random_state=42, shuffle=True)
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    for train_index, test_index in kf.split(X_train, y_train):
        x_train = X_train.iloc[train_index]
        Y_train = y_train.iloc[train_index]
        x_test = X_train.iloc[test_index]
        Y_test = y_train.iloc[test_index]
        model = SVC()
        model.fit(x_train, Y_train)
        predictions = model.predict(x_test)
        accuracies.append(accuracy_score(predictions, Y_test))
        precisions.append(precision_score(predictions, Y_test, average = None))
        recalls.append(recall_score(predictions, Y_test, average = None))
        f1s.append(f1_score(predictions, Y_test, average = None))
    precision = np.sum(precisions, axis=0)/len(precisions)
    recall = np.sum(recalls, axis=0)/len(recalls)
    f1 =  np.sum(f1s, axis=0)/len(f1s)
    print('Accuracy Score : ', sum(accuracies)/len(accuracies))
    print('Precision Score : ', precision)
    print('Recall Score : ', recall)
    print('F1 Score : ', f1)
    print('Avg Precision Score : ', np.mean(np.array(precision)))
    print('Avg Recall Score : ', np.mean(np.array(recall)))
    print('Avg F1 Score : ', np.mean(np.array(f1)))

In [3]:
with open('pima-indians-diabetes.names') as fp:
    line = fp.readlines(1)
    col = line[0].splitlines()
    cols = list(col[0].split(','))

options = {'header': None, 'names': cols, 'skipinitialspace': True}


pima_df = pd.read_csv('pima-indians-diabetes.data', **options)

In [4]:
pima_df

Unnamed: 0,Number of times pregnant,Plasma glucose concentration a 2 hours in an oral glucose tolerance test,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Diabetes pedigree function,Age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [5]:
X = pima_df.iloc[:,:-1]
y = pima_df.iloc[:,-1]

In [8]:
model_fit_predict(X, y)

Accuracy Score :  0.7590567327409433
Precision Score :  [0.906      0.48532764]
Recall Score :  [0.76810276 0.74293035]
F1 Score :  [0.83025274 0.57977833]
Avg Precision Score :  0.6956638176638177
Avg Recall Score :  0.7555165571333797
Avg F1 Score :  0.7050155386869305
