In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

In [2]:
def model_fit_predict(X_train, y_train):
    kf = StratifiedKFold(n_splits = 10, random_state=42, shuffle=True)
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    for train_index, test_index in kf.split(X_train, y_train):
        x_train = X_train.iloc[train_index]
        Y_train = y_train.iloc[train_index]
        x_test = X_train.iloc[test_index]
        Y_test = y_train.iloc[test_index]
        sc = StandardScaler()
        x_train = sc.fit_transform(x_train)
        x_test = sc.transform(x_test)
        model = SVC()
        model.fit(x_train, Y_train)
        predictions = model.predict(x_test)
        accuracies.append(accuracy_score(predictions, Y_test))
        precisions.append(precision_score(predictions, Y_test, average = None))
        recalls.append(recall_score(predictions, Y_test, average = None))
        f1s.append(f1_score(predictions, Y_test, average = None))
    precision = np.sum(precisions, axis=0)/len(precisions)
    recall = np.sum(recalls, axis=0)/len(recalls)
    f1 =  np.sum(f1s, axis=0)/len(f1s)
    print('Accuracy Score : ', sum(accuracies)/len(accuracies))
    print('Precision Score : ', precision)
    print('Recall Score : ', recall)
    print('F1 Score : ', f1)
    print('Avg Precision Score : ', np.mean(np.array(precision)))
    print('Avg Recall Score : ', np.mean(np.array(recall)))
    print('Avg F1 Score : ', np.mean(np.array(f1)))

In [3]:
with open('wine.names') as fp:
    line = fp.readlines(1)
    col = line[0].splitlines()
    cols = list(col[0].split(','))

options = {'header': None, 'names': cols, 'skipinitialspace': True}


wine_df = pd.read_csv('wine.data', **options)

In [4]:
wine_df

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline,class
0,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065,1
1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050,1
2,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185,1
3,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480,1
4,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740,3
174,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750,3
175,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835,3
176,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840,3


In [5]:
X = wine_df.iloc[:,:-1]
y = wine_df.iloc[:,-1]

In [8]:
model_fit_predict(X, y)

Accuracy Score :  0.9777777777777779
Precision Score :  [0.98333333 0.98571429 0.96      ]
Recall Score :  [1.         0.96071429 0.98333333]
F1 Score :  [0.99090909 0.97238095 0.96868687]
Avg Precision Score :  0.9763492063492064
Avg Recall Score :  0.9813492063492064
Avg F1 Score :  0.9773256373256375
