In [None]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
import torch
from scipy import optimize

################################################################################

# INSTRUCTIONS
# the 3 datasets should be saved in a "data" folder

# load data
Xtr = np.array(pd.read_csv(os.path.join('data','Xtr.csv'),header=None,sep=',',usecols=range(3072)))
Xte = np.array(pd.read_csv(os.path.join('data','Xte.csv'),header=None,sep=',',usecols=range(3072)))
Ytr = np.array(pd.read_csv(os.path.join('data','Ytr.csv'),sep=',',usecols=[1])).squeeze()

################################################################################

# transform data
XTRR = Xtr.copy()
XTRR = XTRR.reshape(Xtr.shape[0],3,32,32)
XTRR = np.swapaxes(np.swapaxes(XTRR, 1, 2), 2, 3)
XTRR = ( XTRR - XTRR.min() )/( XTRR.max() - XTRR.min() )

# compute gradients of the image matrix
XTRR_y = np.gradient(XTRR, axis=1)
XTRR_x = np.gradient(XTRR, axis=2)

# compute oriented gradients
orientation_XTRR = np.rad2deg( np.arctan( XTRR_y / (XTRR_x + 1e-8) ) ) % 180

# compute the image histogram
new_Xtr = np.zeros( (Xtr.shape[0], 3*64) )
for img in range(len(XTRR)):
    new_Xtr[img] = np.array([ np.histogram(XTRR[img][:, :, _], bins=64)[0] for _ in range(3) ]).reshape(1,-1)

# compute the oriented gradients image histogram
new_Xtr_orientation = np.zeros( (Xtr.shape[0], 3*64) )
for img in range(len(orientation_XTRR)):
    new_Xtr_orientation[img] = np.array([ np.histogram(orientation_XTRR[img][:, :, _], bins=64)[0] for _ in range(3) ]).reshape(1,-1)

# concatenate both for the input data
NEW_XTR = np.concatenate((new_Xtr, new_Xtr_orientation), axis=1)  

XTEE = Xte.copy()
XTEE = XTEE.reshape(Xte.shape[0],3,32,32)
XTEE = np.swapaxes(np.swapaxes(XTEE, 1, 2), 2, 3)
XTEE = ( XTEE - XTEE.min() )/( XTEE.max() - XTEE.min() )

XTEE_y = np.gradient(XTEE, axis=1)
XTEE_x = np.gradient(XTEE, axis=2)

orientation_XTEE = np.rad2deg( np.arctan( XTEE_y / (XTEE_x + 1e-8) ) ) % 180

new_Xte = np.zeros( (Xte.shape[0], 3*64) )
for img in range(len(XTEE)):
    new_Xte[img] = np.array([ np.histogram(XTEE[img][:, :, _], bins=64)[0] for _ in range(3) ]).reshape(1,-1)

new_Xte_orientation = np.zeros( (Xte.shape[0], 3*64) )
for img in range(len(orientation_XTEE)):
    new_Xte_orientation[img] = np.array([ np.histogram(orientation_XTEE[img][:, :, _], bins=64)[0] for _ in range(3) ]).reshape(1,-1)

NEW_XTE = np.concatenate((new_Xte, new_Xte_orientation), axis=1)

################################################################################

# 3 kernels that seem suitable for image classification task

class GeneralizedHistogramIntersectionKernel():
    def __init__(self, beta):
        self.beta = beta
    def kernel(self,X,Y):
        K = np.zeros((X.shape[0], Y.shape[0]))
        N = X.shape[1]
        # histogram intersection kernel = min kernel
        for i in range(N):
            K = K + np.minimum( (abs(X[:, i])**self.beta).reshape(-1, 1), (abs(Y[:, i])**self.beta).reshape(-1, 1).T )
        return K

class HistogramIntersectionKernel():
    def __init__(self):
        pass
    def kernel(self,X,Y):
        K = np.zeros((X.shape[0], Y.shape[0]))
        N = X.shape[1]
        # histogram intersection kernel = min kernel
        for i in range(N):
            K = K + np.minimum(X[:, i].reshape(-1, 1), Y[:, i].reshape(-1, 1).T)
        return K

class LogKernel():
    def __init__(self, d):
        self.d = d
    def kernel(self, X, Y):
        X = torch.tensor(X)
        Y = torch.tensor(Y)
        return (- torch.log( torch.cdist(X,Y)**self.d + 1 )).numpy()

################################################################################

# binary classification kernel SVM adaptation to multiple classes
class KernelSVC:
    
    def __init__(self, C, kernel, epsilon = 1e-3):
        self.C = C                               
        self.kernel = kernel        
        self.alpha = None
        self.epsilon = epsilon
       
    def fit(self, X, y):
        N = len(y)
        K = self.kernel(X,X)
        self.X = X
        self.y = y

        def loss(alpha):
            return 1./2 * np.linalg.multi_dot([alpha.T, np.diag(y), K, np.diag(y), alpha]) - alpha.T.dot(np.ones(N))
        
        def grad_loss(alpha):
            return np.linalg.multi_dot([np.diag(y), K, np.diag(y), alpha]) - np.ones(N)

        # function defining the equality constraint
        fun_eq = lambda alpha: - alpha.T.dot(y)
        
        # jacobian wrt alpha of the equality constraint
        jac_eq = lambda alpha: - y
        
        # function defining the inequality constraint
        fun_ineq1 = lambda alpha: self.C*np.ones(N) - alpha
        fun_ineq2 = lambda alpha: alpha
        
        # jacobian wrt alpha of the  inequality constraint
        jac_ineq1 = lambda alpha: - np.eye(N)
        jac_ineq2 = lambda alpha: np.eye(N)
        
        constraints = ({'type': 'eq', 'fun': fun_eq, 'jac': jac_eq},
                       {'type': 'ineq', 'fun': fun_ineq1, 'jac': jac_ineq1},
                       {'type': 'ineq', 'fun': fun_ineq2, 'jac': jac_ineq2})

        optRes = optimize.minimize(fun=lambda alpha: loss(alpha),
                                   x0=np.ones(N), 
                                   method='SLSQP', 
                                   jac=lambda alpha: grad_loss(alpha), 
                                   constraints=constraints)
      
        self.alpha = optRes.x 

        # A matrix with each row corresponding to a support vector
        supportIndices = np.asarray( (constraints[1]["fun"](self.alpha) > self.epsilon)&(constraints[2]["fun"](self.alpha) > self.epsilon) ).nonzero()
        self.support = X[supportIndices] 
        
        # offset of the classifier
        self.b = ( y[supportIndices] - (self.alpha.T @ np.diag(y) @ K)[supportIndices] ).mean()

    def separating_function(self,x):
        return self.alpha.T @ np.diag(self.y) @ self.kernel(self.X, x)
    
    def predict(self, X):
        d = self.separating_function(X)
        return d + self.b

class MultiKernelSVC:

    def __init__(self, kernel, C, epsilon=1e-3):
        self.C = C        
        self.kernel = kernel        
        self.epsilon = epsilon
        self.SVC_classifiers = []
       
    def fit(self, X, y):
        self.X = X
        self.n_classes = len(np.unique(y))
        for c in np.unique(y):

          id_class = np.asarray( y == c ).nonzero()[0]
          id_other = np.random.choice( np.asarray( y != c ).nonzero()[0], size = len(id_class), replace = False )

          Y = np.zeros( y.shape )
          Y[ id_class ] = 1
          Y[ id_other ] = -1
          new_X = X[Y!=0]
          new_Y = Y[Y!=0]

          classifier = KernelSVC(self.C, self.kernel, self.epsilon)
          classifier.fit(new_X, new_Y)
          self.SVC_classifiers.append(classifier)
    
    def predict(self, x):
        proba_prediction = np.array([clf.predict(x) for clf in self.SVC_classifiers]).T
        prediction = np.argmax(proba_prediction, axis=1)
        return prediction

################################################################################

# submission

best_kernel = LogKernel(3).kernel
best_C_value = 10

kernel_svc = MultiKernelSVC(kernel = best_kernel, C = best_C_value)
kernel_svc.fit(NEW_XTR, Ytr)
Yte = kernel_svc.predict(NEW_XTE)

Yte = {'Prediction' : Yte}
dataframe = pd.DataFrame(Yte)
dataframe.index += 1
dataframe.to_csv('Yte_pred.csv', index_label='Id')