In [1]:
import cvxopt
import sys
from sift import SIFT, Helper
from time import time
import pandas as pd
import matplotlib as plt
import os
import numpy as np
from svm import mySVC
from KFD import myKFD
from transform import myTransform
from kernels import rbf, linear, poly, sigmoid, laplacian, ch2


Succesfully imported joblib.Parallel, it will be used to try to run training/projecting of multi-class SVC in parallel ...
Succesfully imported numba.jit, it will be used to try to speed-up function calls ...
Successfully imported joblib, it will be used to try to run training/predictions of multi-class KFD in parallel ...
Successfully imported numba.jit, it will be used to try to speed-up function calls ...


In [2]:
"""Load datasets"""

# Training set
X_df = pd.read_csv('./data/Xtr.csv', header=None)
y_df = pd.read_csv('./data/Ytr.csv')
X_df = X_df.loc[:,:3071]

# Test set
X_test = pd.read_csv('./data/Xte.csv', header=None)
X_test = X_test.loc[:,:3071]

X = X_df.values
y = y_df.Prediction

X_test = X_test.values

"""Train and test image processing into 32x32x3 shape"""
red, green, blue = np.hsplit(X, 3)
data = np.array([np.dstack((red[i], blue[i], green[i])).reshape(32, 32, 3) for i in range(len(X))])

red, green, blue = np.hsplit(X_test, 3)
data_test = np.array([np.dstack((red[i], blue[i], green[i])).reshape(32, 32, 3) for i in range(len(X_test))])

In [3]:
"""Data augmentation with Gaussian blur or random horizontal flip."""

start = time()
finish = len(data)
augmented_train = []
ImageTransformation = myTransform()  # Corrected instantiation

for row in range(0, finish):
    augmented_train.append(data[row])
    augmented_train.append(ImageTransformation.flip_image_horizontal(data[row]))
augmented_train = np.array(augmented_train)

start = time()
augmented_labels = []

for row in range(len(data)):
    lab = y[row]
    augmented_labels.append(lab)
    augmented_labels.append(lab)

augmented_labels = np.array(augmented_labels)

In [4]:


"""We set SIFT parameters, the choice of them has been done based on existing information and empirically."""

params = { 'gs': 6,
           'chi2_gamma': .6,
           'C': 10.,
           'ps': 31,
           'sift_thres': .3,
           'gaussian_thres': .7,
           'gaussian_sigma': .4,
           'num_angles': 12,
           'num_bins': 5,
           'alpha': 9.0 }

sift = SIFT(gs=params['gs'], 
                 ps=params['ps'], 
                 sift_thres=params['sift_thres'], 
                 gaussian_sigma=params['gaussian_sigma'], 
                 gaussian_thres=params['gaussian_thres'],
                 num_angles=params['num_angles'],
                 num_bins=params['num_bins'],
                 alpha=params['alpha'])

"""With data augmentation we end up with 10000 vectors of dimension $n$, the number of SIFT features, for training."""

target = augmented_labels
train = sift.get_X(augmented_train)
test = sift.get_X(data_test)

SIFT - 100.00% ----- Temps restant estimÃ©: 0 min 0 sec ------

In [6]:
# Train model on known data

svm_parameters = {
    'kernel': 'ch2',  # Example kernel type
    'C': 7.5,          # Example regularization parameter
    # Add more parameters as needed
}

svm_model = mySVC(**svm_parameters)
print("Training SVM on all training data ...")
target = np.array(target) 
svm_model.fit(train, target)

# Verify results on known data (gives you a hint on whether you are overfitting)

CHECK_SCORE_TRAIN_DATA = True
if CHECK_SCORE_TRAIN_DATA:
    train_score = svm_model.score(train, target)
    print("Checking the score on the train data : {:.2%} ...".format(train_score))

# Prediction on non labelled data
print("Prediction on test data ...")
prediction = svm_model.predict(test)

  A new mySVC object has been created:
    > mySVC(kernel=ch2, C=7.5, n_classes_=None, max_n_classes=10, degree=3, gamma=auto, coef0=1.0, threshold=0.001, verbose=1, n_jobs=1, cache_size=200)
Training SVM on all training data ...
  Training mySVC... on X of shape (10000, 300).
  Using y of shape (10000,) with 10 different classes
  BinarySVC parameters:
 {'kernel': 'ch2', 'C': 7.5, 'degree': 3, 'gamma': 'auto', 'coef0': 1.0, 'verbose': 1, 'cache_size': 200}
  A new BinarySVC object has been created:
    > BinarySVC(kernel=ch2, C=7.5, degree=3, gamma=auto, coef0=1.0, threshold=0.001, verbose=1, cache_size=200)
  A new BinarySVC object has been created:
    > BinarySVC(kernel=ch2, C=7.5, degree=3, gamma=auto, coef0=1.0, threshold=0.001, verbose=1, cache_size=200)
  A new BinarySVC object has been created:
    > BinarySVC(kernel=ch2, C=7.5, degree=3, gamma=auto, coef0=1.0, threshold=0.001, verbose=1, cache_size=200)
  A new BinarySVC object has been created:
    > BinarySVC(kernel=ch2, C=

In [7]:
# Saving the predictions

outname = 'Submission_SVM_Kernel%s_C=%s_flipimage_NS.csv' % (svm_parameters['kernel'], svm_parameters['C'])
print("Saving the predictions to the CSV file '%s' ..." % outname)

# Saving to 'outname', and to 'Yte.csv' 
for on in [outname, 'Yte.csv']:
    np.savetxt(on,
               np.c_[range(1, len(test) + 1), prediction],
               delimiter=',',
               comments='',
               header='Id,Prediction',
               fmt='%d')

Saving the predictions to the CSV file 'Submission_SVM_Kernelch2_C=7.5_flipimage_NS.csv' ...
