In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

In [2]:
def model_fit_predict(X_train, y_train):
    kf = StratifiedKFold(n_splits = 10, random_state=42, shuffle=True)
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    for train_index, test_index in kf.split(X_train, y_train):
        x_train = X_train.iloc[train_index]
        Y_train = y_train.iloc[train_index]
        x_test = X_train.iloc[test_index]
        Y_test = y_train.iloc[test_index]
        model = SVC()
        model.fit(x_train, Y_train)
        predictions = model.predict(x_test)
        accuracies.append(accuracy_score(predictions, Y_test))
        precisions.append(precision_score(predictions, Y_test, average = None))
        recalls.append(recall_score(predictions, Y_test, average = None))
        f1s.append(f1_score(predictions, Y_test, average = None))
    precision = np.sum(precisions, axis=0)/len(precisions)
    recall = np.sum(recalls, axis=0)/len(recalls)
    f1 =  np.sum(f1s, axis=0)/len(f1s)
    print('Accuracy Score : ', sum(accuracies)/len(accuracies))
    print('Precision Score : ', precision)
    print('Recall Score : ', recall)
    print('F1 Score : ', f1)
    print('Avg Precision Score : ', np.mean(np.array(precision)))
    print('Avg Recall Score : ', np.mean(np.array(recall)))
    print('Avg F1 Score : ', np.mean(np.array(f1)))

In [5]:
seed_df = pd.read_table('seeds_dataset.txt', header=None)

In [6]:
seed_df

Unnamed: 0,0,1,2,3,4,5,6,7
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1
...,...,...,...,...,...,...,...,...
205,12.19,13.20,0.8783,5.137,2.981,3.631,4.870,3
206,11.23,12.88,0.8511,5.140,2.795,4.325,5.003,3
207,13.20,13.66,0.8883,5.236,3.232,8.315,5.056,3
208,11.84,13.21,0.8521,5.175,2.836,3.598,5.044,3


In [9]:
X = seed_df.iloc[:,:-1]
y = seed_df.iloc[:,-1]

In [10]:
model_fit_predict(X, y)

Accuracy Score :  0.9047619047619048
Precision Score :  [0.84285714 0.91428571 0.95714286]
Recall Score :  [0.88059524 0.95654762 0.90214286]
F1 Score :  [0.85540293 0.93161172 0.92616246]
Avg Precision Score :  0.9047619047619048
Avg Recall Score :  0.9130952380952381
Avg F1 Score :  0.9043923723335489
