In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn import svm
from PIL import Image
import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [4]:
def load_image(file):
    img = Image.open(file)
    img = img.resize((64,64))  # You may need to adjust the size or the process here based on your specific image data
    img_array = np.array(img)
    img_array = img_array.flatten()
    return img_array


In [5]:
def load_images_and_labels(path_image_dir, path_csv):
    df = pd.read_csv(path_csv)
    images_ids = df['Image'].values
    images = []

    if 'Class' in df.columns:
        labels = df['Class'].values
    else:
        labels = None

    for image_id in images_ids:
        image_path = os.path.join(path_image_dir, image_id)
        if os.path.isfile(image_path):
            image_array = load_image(image_path)
            images.append(image_array)
    images = np.array(images)
    return images, labels


In [6]:
x_train, y_train = load_images_and_labels('../data/train_images', '../data/train.csv')
x_val, y_val = load_images_and_labels('../data/val_images', '../data/val.csv')
x_test, _ = load_images_and_labels('../data/test_images', '../data/test.csv')

In [8]:
x_train = x_train[:3000]
y_train = y_train[:3000]

In [17]:
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_val = scaler.transform(x_val)

In [18]:
print(scaler.mean_)
print(scaler.scale_)

[107.04158333 101.17216667 107.00675    ...  99.82908333  91.44475
  97.55966667]
[75.24539756 72.15628426 71.80566044 ... 74.93123094 71.37855734
 72.04444536]


In [49]:
# print(x_train.shape) # images, 64*64*3
print(x_train[0], y_train[0])

[253 249 240 ... 248 237 225] 38


In [7]:
c_values = [1]
gamma_values = [1]
# kernel_values = ['poly', 'rbf', 'sigmoid', 'linear',]
kernel_values = ['poly']

In [28]:
for c in c_values:
    print("C: ", c)
    for gamma in gamma_values:
        print("Gamma: ", gamma)
        for kernel in kernel_values:
            print("Kernel: ", kernel)
            svc_instance = svm.SVC(C=c, gamma=gamma, kernel=kernel)
            svc_instance.fit(x_train, y_train)
            file_name = f"../models/svm_{c}_{gamma}_{kernel}.pkl"
            with open(file_name, 'wb') as file:
                pickle.dump(svc_instance, file)

C:  1
Gamma:  1
Kernel:  poly
Kernel:  rbf
Kernel:  sigmoid
Kernel:  linear


In [8]:
all_models = []
step = 0

for c in c_values:
    for gamma in gamma_values:
        for kernel in kernel_values:
            step += 1
            print(f"Step: {step}")
            print(f"Configuration: {kernel}, {gamma}, {c}")
            file_name = f"../models/svm_{c}_{gamma}_{kernel}.pkl"
            with open(file_name, 'rb') as file:
                svc_instance = pickle.load(file)
                predicted = svc_instance.predict(x_val)
                accuracy = accuracy_score(y_val, predicted)
                precision = precision_score(y_val, predicted, average='macro')
                recall = recall_score(y_val, predicted, average='macro')
                conf_matrix = confusion_matrix(y_val, predicted)
                all_models.append({'C': c, 'gamma': gamma, 'kernel': kernel, 'accuracy': accuracy, 'precision': precision, 'recall': recall, 'conf_matrix': conf_matrix})

print(all_models)

Step: 1
Configuration: poly, 1, 1
[{'C': 1, 'gamma': 1, 'kernel': 'poly', 'accuracy': 0.333, 'precision': 0.3317324520349511, 'recall': 0.3246961082898583, 'conf_matrix': array([[1, 0, 0, ..., 0, 0, 1],
       [0, 2, 0, ..., 0, 0, 0],
       [0, 0, 8, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 4, 0, 0],
       [0, 0, 0, ..., 0, 6, 0],
       [0, 0, 1, ..., 0, 0, 3]])}]


In [8]:
file = open("../models/svm_models.pkl", 'wb')
pickle.dump(all_models, file)

: 

: 

In [11]:
all_models = None
with open("../models/svm_models.pkl", 'rb') as file:
    all_models = pickle.load(file)
print(all_models)

[{'score': 0.036, 'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}, {'score': 0.333, 'C': 0.1, 'gamma': 1, 'kernel': 'poly'}, {'score': 0.285, 'C': 0.1, 'gamma': 1, 'kernel': 'linear'}, {'score': 0.036, 'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}, {'score': 0.333, 'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}, {'score': 0.285, 'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}, {'score': 0.036, 'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}, {'score': 0.333, 'C': 0.1, 'gamma': 0.01, 'kernel': 'poly'}, {'score': 0.285, 'C': 0.1, 'gamma': 0.01, 'kernel': 'linear'}, {'score': 0.036, 'C': 1, 'gamma': 1, 'kernel': 'rbf'}, {'score': 0.333, 'C': 1, 'gamma': 1, 'kernel': 'poly'}, {'score': 0.285, 'C': 1, 'gamma': 1, 'kernel': 'linear'}, {'score': 0.036, 'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}, {'score': 0.333, 'C': 1, 'gamma': 0.1, 'kernel': 'poly'}, {'score': 0.285, 'C': 1, 'gamma': 0.1, 'kernel': 'linear'}, {'score': 0.036, 'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}, {'score': 0.333, 'C': 1, 'gamma': 0.01, 'kernel': 'poly'}

In [9]:
for model in all_models:
    print(model)

{'C': 1, 'gamma': 1, 'kernel': 'poly', 'accuracy': 0.333, 'precision': 0.3317324520349511, 'recall': 0.3246961082898583, 'conf_matrix': array([[1, 0, 0, ..., 0, 0, 1],
       [0, 2, 0, ..., 0, 0, 0],
       [0, 0, 8, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 4, 0, 0],
       [0, 0, 0, ..., 0, 6, 0],
       [0, 0, 1, ..., 0, 0, 3]])}


In [36]:
df = pd.DataFrame(all_models)

df.drop('conf_matrix', axis=1, inplace=True)
df.rename(columns={'score': 'accuracy'}, inplace=True)
cols = list(df.columns.values)
cols.pop(cols.index('accuracy'))
df = df[cols+['accuracy']]
df.drop('C', axis=1, inplace=True)
df.drop('gamma', axis=1, inplace=True)

# replace gamma parameter with - where kernel is linear

print(df)


    kernel  precision    recall  accuracy
0     poly   0.331732  0.324696     0.333
1      rbf   0.000375  0.010417     0.036
2  sigmoid   0.000375  0.010417     0.036
3   linear   0.294230  0.280769     0.285


In [37]:
# from IPython.display import display

# display(df)

import dataframe_image as dfi

dfi.export(df, '../plots/svm_models_6.png')

In [12]:
with open('../models/svm_1_1_poly.pkl', 'rb') as file:
    svc_instance = pickle.load(file)
    predicted = svc_instance.predict(x_test)

In [13]:
with open('../data/test.csv', 'r') as test_file:
    image_ids = test_file.readlines()
    image_ids = [line.strip() for line in image_ids]
    image_ids = image_ids[1:]
    image_ids = [line.split(',') for line in image_ids]
    image_ids = [line[0] for line in image_ids]


In [14]:
print(image_ids[:1])

['29a5a33c-dc01-4515-b735-4f0a36dcc06d.png']


In [15]:
print(predicted[:1])

[48]


In [16]:
df_predictions = pd.DataFrame({
    'Image': image_ids,
    'Class': predicted
})

# Save the DataFrame to a CSV file
df_predictions.to_csv('../submissions/submission_svm.csv', index=False)

In [13]:
conf_matrix = all_models[0]['conf_matrix']
print(conf_matrix)

conf_matrix = conf_matrix.astype('float') / (conf_matrix.sum(axis=1)[:, np.newaxis] + 1e-7)

plt.figure(figsize=(10, 10))
sns.heatmap(conf_matrix, annot=False, linewidths=.5, square = True, cmap = 'Reds')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
plt.title('Confusion Matrix', size = 15)
plt.savefig('../plots/confusion_matrix_net_5.png')
plt.show()


[[1 0 0 ... 0 0 1]
 [0 2 0 ... 0 0 0]
 [0 0 8 ... 0 0 0]
 ...
 [0 0 0 ... 4 0 0]
 [0 0 0 ... 0 6 0]
 [0 0 1 ... 0 0 3]]
