# Kernel methods 

### Import des fonctions


In [5]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,StratifiedKFold
from sklearn.metrics import accuracy_score
from scipy.linalg import solve,lstsq
from scipy.stats import uniform
from cvxopt import matrix
from cvxopt import solvers
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from itertools import compress, product
import numexpr as ne

In [6]:
X_0=pd.read_csv("Data/Xtr0_mat100.csv",sep=' ',header=None)
X_1=pd.read_csv("Data/Xtr1_mat100.csv",sep=' ',header=None)
X_2=pd.read_csv("Data/Xtr2_mat100.csv",sep=' ',header=None)


Y_0=pd.read_csv("Data/Ytr0.csv",sep=',')
Y_1=pd.read_csv("Data/Ytr1.csv",sep=',')
Y_2=pd.read_csv("Data/Ytr2.csv",sep=',')

Y_0

Unnamed: 0,Id,Bound
0,0,0
1,1,1
2,2,1
3,3,1
4,4,1
...,...,...
1995,1995,0
1996,1996,1
1997,1997,0
1998,1998,0


In [7]:
print(Y_0["Bound"].value_counts())
print(Y_1["Bound"].value_counts())
print(Y_2["Bound"].value_counts())

0    1038
1     962
Name: Bound, dtype: int64
1    1001
0     999
Name: Bound, dtype: int64
0    1003
1     997
Name: Bound, dtype: int64


Les classes sont plutôt équilibrées 

In [8]:
Y_0train=np.where(Y_0["Bound"]==0,-1,Y_0["Bound"])
Y_1train=np.where(Y_1["Bound"]==0,-1,Y_1["Bound"])
Y_2train=np.where(Y_2["Bound"]==0,-1,Y_2["Bound"])

X_0train=X_0.to_numpy()
X_1train=X_1.to_numpy()
X_2train=X_2.to_numpy()
print(X_0train.shape)

(2000, 100)


## Ridge regression ( équivalent à Kernel ridge régression avec noyau linéaire) 
On ajoute une colonne de biais 

In [None]:
X_0train_forRR=np.hstack((np.ones((X_0train.shape[0],1)),X_0train))
X_1train_forRR=np.hstack((np.ones((X_0train.shape[0],1)),X_0train))
X_2train_forRR=np.hstack((np.ones((X_0train.shape[0],1)),X_0train))

### Solver ridge Regression 
Même si les classes sont équilibrées je laisse la possibilité de modifier les poids. Enfin pas vraiment puisque le seul poids que je met quand on souhaite en mettre c'est le poid correspondant au poids des classes dans le set qui sera entraîné.
J'utilisais lstsq pour résoudre le système lineaire car plus stable si la matrice est mal conditionné/pas inversible mais cela prend beaucoup plus de temps que solve, ducoup je suis repassé à solve.

In [None]:
def Ridge_Regresssion(X_train,y_train,lbd,weight=False):
    n=y_train.shape[0]
    d=X_train.shape[1]
    if not(weight):
        A=(X_train.T.dot(X_train))/n+lbd*np.eye(d)
        B=X_train.T.dot(y_train)/n
        theta=solve(A,B)
        return theta
    else:
        w1=(y_train==1).mean()
        w0=1-w1
        w=np.where(y_train==1,w1,w0)
        W=np.diag(w)
        A=(X_train.T.dot(W.dot(X_train)))/n+lbd*np.eye(d)
        B=X_train.T.dot(W.dot(y_train))/n
        theta=solve(A,B)
        return theta


 ### Création de l'estimateur Ridge regression compatible avec l'API de scikit learn

In [None]:
class LinearRR(BaseEstimator,ClassifierMixin):
    def __init__(self,lbd=1,weight=False):
        self.lbd=lbd
        self.weight=weight
        
    def fit(self,X,y):
        self.classes_ = np.unique(y)
        
        self.theta=Ridge_Regresssion(X,y,self.lbd,self.weight)
        return self
    def decision_function(self,X):
        return X.dot(self.theta)
        
    def predict(self,X,y=None):
        scores=self.decision_function(X)
        if len(scores.shape) == 1:
            indices = (scores > 0).astype(np.int)
        else:
            indices = scores.argmax(axis=1)
        return self.classes_[indices]
   
    def get_params(self, deep=True):
    
        return {"lbd": self.lbd,"weight":self.weight}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

### Fonction permettant d'afficher les n meilleurs résultats avec les paramètres associées d'une grid search dans un tableau (dataframe)

In [163]:
def présentation_résultat(search,n):
    mask=search.cv_results_['rank_test_score']<=n
    params=list(compress(search.cv_results_['params'], list(mask)))
    mean_test_score=search.cv_results_['mean_test_score'][mask]
    a={}
    for i in range(mean_test_score.size):
        k=''
        for key, value in params[i].items():
            k+=" "+key+" "+str(value)
        a.update({k:mean_test_score[i]})
        sortedDict = sorted(a.items(), key=lambda x: x[1],reverse=True)
    l=[]
    for i in sortedDict:
        u=i[0].split(sep=' ')
        del(u[0])
        lp=[]
        for j in u[1::2]:
            lp.append(j)
        lp.append(i[1])
        l.append(lp)
    head=list(params[0].keys())+["mean_test_score"]

    return(pd.DataFrame(l,columns=head))
        
    
    
    
    

## Première recherche des hyperparamètres pour chaque train set 
Je crée un pipeline composé d'un transformer de données et de la ridge regression. Dans la manière dont j'ai défini la pipeline le transformer en question est *StandardScaler()* qui standardise les données ((X-mean)/std)). Attention cela standardise les données selon la mean et std du train set et donc lors d'une prédiction le test/validation set sera standardisé par rapport à la mean et std du training set. 
En soit la manière dont j'ai défini le tranformeur n'a pas d'importance car dans la gridSearch je teste d'autres transformeur à certain moment(mais fallait bien en mettre pour faire la pipeline), il y a même une option *"passthrough"* pour la première étape de la pipeline qui veut dire tout simplement passer à l'étape suivante et donc ne pas transfomrer mes données.


In [None]:
RR=Pipeline([("preprocess",StandardScaler()),("linearrr",LinearRR())])
print(RR)

hps0={"linearrr__lbd":[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10],"linearrr__weight":[False,True],
     "preprocess":["passthrough",StandardScaler(),RobustScaler(),MinMaxScaler()]}
searchLRR0= GridSearchCV(RR,hps0,scoring="accuracy",cv=5,verbose=1)
searchLRR0.fit(X_0train_forRR, Y_0train)

hps1={"linearrr__lbd":[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10],"linearrr__weight":[False,True],
     "preprocess":["passthrough",StandardScaler(),RobustScaler(),MinMaxScaler()]}
searchLRR1= GridSearchCV(RR,hps1,scoring="accuracy",cv=5,verbose=1)
searchLRR1.fit(X_1train_forRR, Y_1train)


hps2={"linearrr__lbd":[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10],"linearrr__weight":[False,True],
     "preprocess":["passthrough",StandardScaler(),RobustScaler(),MinMaxScaler()]}
searchLRR2= GridSearchCV(RR,hps2,scoring="accuracy",cv=5,verbose=1)
searchLRR2.fit(X_2train_forRR, Y_2train)


## Affichage des résulats 

In [None]:
présentation_résultat(searchLRR0,10)


In [None]:
présentation_résultat(searchLRR1,10)

In [None]:
présentation_résultat(searchLRR2,10)

### Recherche plus fine dans les zones d'intérêts déterminés à l'aide la search précedente

Ici je fais une RandomizedGrid search car j'ai beaucoup plus d'informations sur les zones d'intérêts où il faut chercher(notamment pour le lambda). Si je l'avais fait avant étant donné que j'avais aucun à priori sur la valeur de lambda la recherche au hasard aurait été plus difficile. 

In [None]:

hps0={"linearrr__lbd":uniform(loc=0.005,scale=5),"linearrr__weight":[False,True]}
searchLRR0= RandomizedSearchCV(RR,hps0,n_iter=500,scoring="accuracy",cv=5,verbose=1)
searchLRR0.fit(X_0train_forRR, Y_0train)

hps1={"linearrr__lbd":uniform(loc=0.05,scale=15),"linearrr__weight":[False,True],
     "preprocess":["passthrough",StandardScaler(),RobustScaler(),MinMaxScaler()]}
searchLRR1= RandomizedSearchCV(RR,hps1,n_iter=1000,scoring="accuracy",cv=5,verbose=1)
searchLRR1.fit(X_1train_forRR, Y_1train)


hps2={"linearrr__lbd":uniform(loc=0.005,scale=15),"linearrr__weight":[False,True],
     "preprocess":["passthrough"]}
searchLRR2= RandomizedSearchCV(RR,hps2,n_iter=500,scoring="accuracy",cv=5,verbose=1)
searchLRR2.fit(X_2train_forRR, Y_2train)


### Présentation des résultats 

In [None]:
présentation_résultat(searchLRR0,10)


In [None]:
présentation_résultat(searchLRR1,10)

In [None]:
présentation_résultat(searchLRR2,2)

### Recherche encore plus fine (sert pas trop à grand chose car on va garder ce modèle)

In [None]:
hps0={"linearrr__lbd":uniform(loc=0.8,scale=1.2),"linearrr__weight":[False]}
searchLRR0= RandomizedSearchCV(RR,hps0,n_iter=100,scoring="accuracy",cv=5,verbose=1)
searchLRR0.fit(X_0train_forRR, Y_0train)

hps1={"linearrr__lbd":uniform(loc=0.01,scale=1.5),"linearrr__weight":[False],
     "preprocess":["passthrough"]}
searchLRR1= RandomizedSearchCV(RR,hps1,n_iter=100,scoring="accuracy",cv=5,verbose=1)
searchLRR1.fit(X_1train_forRR, Y_1train)


hps2={"linearrr__lbd":uniform(loc=0.005,scale=0.1),"linearrr__weight":[False],
     "preprocess":["passthrough"]}
searchLRR2= RandomizedSearchCV(RR,hps2,n_iter=100,scoring="accuracy",cv=5,verbose=1)
searchLRR2.fit(X_2train_forRR, Y_2train)


### Présentation des résultats 

In [None]:
présentation_résultat(searchLRR0,10)


In [None]:
présentation_résultat(searchLRR1,10)

In [None]:
présentation_résultat(searchLRR2,10)

# Kernel Ridge regression 

### Crétion du kernel de manière efficiente voir ce lien pour comprendre https://stackoverflow.com/questions/47271662/what-is-the-fastest-way-to-compute-an-rbf-kernel-in-python 
Mais ce qu'il y a retenir c'est que $\| x-y \|^2=\|x\|^2 + \|y\|^2 -2x^Ty$ et d'utiliser à son avantage le broadcast de Numpy. Le ne.evaluate est là pour rendre l'opération plus rapide mais l'opération qui est faite et bien "exp(-g*(A+B-2*C))" Avec les A B C g correspondant. D'ailleurs la propriété précédente nous permet de manière générale de calculer la matrice $K(x_i,y_i)$ pour $y_1,\dots,y_k ~~x_1,\dots,x_n$  $y_i,x_i \in R^d$ La kernel matrice et un cas particulier où $n=k$ et $x_i=y_i$ pour tout $i$.
Cette fonction est contruite juste après la fonction qui construit la kernel matrice. Elle nous sera très utile en espérant que vous aviez déjà deviné pourquoi, c'est pour  calculer $\left(f(z_1),\dots,f(z_k)\right)$ oû $f(z_j)=\sum_{i=1}^{n}\alpha_i K(z_j,x_i)$ 


In [9]:
def rbf_kernel(X_train,gamma):
    X_norm = np.sum(X_train ** 2, axis = -1)
    K = ne.evaluate('exp(-g * (A + B - 2 * C))', {
        'A' : X_norm[:,None],
        'B' : X_norm[None,:],
        'C' : np.dot(X_train, X_train.T),
        'g' : gamma,
    })
    return K

In [10]:
def K_rbf_kernel(X_test,X_train,gamma):
   
    Xtt_norm = np.sum(X_test ** 2, axis = -1)
    Xtr_norm = np.sum(X_train ** 2, axis = -1)
    K = ne.evaluate('exp(-g * (A + B - 2 * C))', {
        'A' : Xtt_norm[:,None],
        'B' : Xtr_norm[None,:],
        'C' : np.dot(X_test, X_train.T),
        'g' : gamma,
    })
    return K
    

In [11]:
def poly_kernel(X,Y,degree,c0):
    return (X.dot(Y.T)+c0)**degree

In [12]:
def addbiais(X):
    return np.hstack((X,np.ones((X.shape[0],1))))
def addzeros(X):
    n,_=X.shape
    A=np.zeros((n+1,n+1))
    A[:n,:n]=X
    return(A)

    
    


### Création du solver Kernel ridge regresion

Comme vous l'avez remarqué je laisse la possibilité d'ajouter un paramètre de biais à l'estimateur ($f(x)=\sum_{i=1}^{n}K(xi,x)\,+b)$ mais qui n'est pas pénalisé!! car cela aurait aucun sens. Cela mène bien au système d'équation que je résouds dans la partie *bais* du solver qui suit, vérifier à la main si vous n'êtes pas convaincu

In [13]:
def Kernel_Ridge_Regression(X_train,y_train,lbd,weight,gamma,degree,c0,k,biais,kernel):
    if kernel=="rbf":
        K=rbf_kernel(X_train,gamma)
    elif kernel=="poly":
        K=poly_kernel(X_train,X_train,degree,c0)
    elif kernel=="spectrum":
        K=spectrum_kernel(X_train,k)
    elif kernel=="precomputed":
        K=X_train
    n=K.shape[0]
    w=weight
    if not(biais):
        if isinstance(weight,bool):
            A=(K+n*lbd*np.eye(n))
            alpha=solve(A,y_train,assume_a="sym")
            return alpha
        elif isinstance(weight,str):
                w1=(y_train==1).mean()
                w0=1-w1
                w=np.where(y_train==1,w1,w0)
        wi=(1/w)
        
        A=K+n*lbd*wi*np.eye(n)
        alpha=solve(A,y_train,assume_a="sym")
     
        return alpha
    else:
        
        Kb=addbiais(K)
        
        K0=addzeros(K)
    
        if isinstance(weight,bool):
            A=(Kb.T.dot(Kb)+lbd*n*K0)
            B=Kb.T.dot(y_train)
            alpha=solve(A,B,assume_a="sym")
            return alpha
        elif isinstance(weight,str):
                w1=(y_train==1).mean()
                w0=1-w1
                w=np.where(y_train==1,w1,w0)
        W=np.diag(w)
        A=(Kb.T.dot(W.dot(Kb))+lbd*n*K0)
        B=Kb.T.dot(W.dot(y_train),assume_a="sym")
        alpha=solve(A,B)
        return alpha
        
    
    

### Création de l'estimateur compatible avec l'Api de scikit 

In [14]:
class KernelRR(BaseEstimator,ClassifierMixin):
    def __init__(self,lbd=1,weight=False,gamma="auto",degree=2,c0=1,k=3,biais=False,kernel="rbf"):
        self.lbd=lbd
        self.weight=weight
        self.gamma=gamma
        self.degree=degree
        self.c0=c0
        self.k=k
        self.biais=biais
        self.kernel=kernel
    def fit(self,X,y):
        self.classes_ = np.unique(y)
        self.Xtr=X
        if isinstance(self.gamma,str) and self.kernel=="rbf":
            self.gamma=1/self.Xtr.shape[1]
        self.alpha=Kernel_Ridge_Regression(X,y,self.lbd,self.weight,self.gamma,self.degree,self.c0,self.k,self.biais,self.kernel)
        return self
    def decision_function(self,X):
        if self.kernel=="precomputed":
            return X.dot(self.alpha)
        if not(self.biais):
            if self.kernel=="rbf":
                return K_rbf_kernel(X,self.Xtr,self.gamma).dot(self.alpha) 
            elif self.kernel=="poly":
                return poly_kernel(X,self.Xtr,self.degree,self.c0).dot(self.alpha) 
            elif self.kernel=="spectrum":
                return K_spectrum_kernel(X,self.Xtr,self.k).dot(self.alpha) 
        else:
            if self.kernel=="rbf":
                return addbiais(K_rbf_kernel(X,self.Xtr,self.gamma)).dot(self.alpha)
            elif self.kernel=="poly":
                return addbiais(poly_kernel(X,self.Xtr,self.degree,self.c0)).dot(self.alpha)
            elif self.kernel=="spectrum":
                return addbiais(K_spectrum_kernel(X,self.Xtr,self.k)).dot(self.alpha)

    def predict(self,X,y=None):
        scores=self.decision_function(X)
        if len(scores.shape) == 1:
            indices = (scores > 0).astype(np.int)
        else:
            indices = scores.argmax(axis=1)
        return self.classes_[indices]
   
    def get_params(self, deep=True):
    
        return {"lbd": self.lbd,"weight":self.weight,"gamma":self.gamma,"degree":self.degree,"c0":self.c0,"k":self.k,
                "biais":self.biais,"kernel":self.kernel}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

### Recherche des hyperparamètres pour KRR avec noyau gaussien 
Pour la transformation des données ici vu qu'on va ensuite appliqué un kernel qui va rédéfinir les distance entre le points la seul transformation qui à un sens est celle où l'on standardise les données car elle conserve les rapports de distance(et donc pas affecté le kernel). De plus point très important, le paramètre *gamma* du noyau et *lambda* de la régularisation peuvent ne pas jouer de pair dans la performance de l'estimateur, il est donc difficile d'optimiser à la fois *lambda* et *gamma*. Effectivement il est possible que pour un *gamma* haut on ait besoin d'un *lambda* bas pour performer et inversement et comme on ne veut pas se perdre dans différents chemins, il semble logique de fixer *gamma* puis d'optimer *lambda*. Cela tombe bien puisque sans rentrer dans les détails mais lorsque que les données sont standardisées un paramètre de *gamma* naturel est 1/(num_features).

In [15]:
KRR=make_pipeline(StandardScaler(),KernelRR(kernel="rbf"))
print(KRR)

hps0={'kernelrr__lbd':[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10]}
searchKRR0= GridSearchCV(KRR,hps0,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKRR0.fit(X_0train, Y_0train)

hps1={"kernelrr__lbd":[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10]}
searchKRR1= GridSearchCV(KRR,hps1,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKRR1.fit(X_1train, Y_1train)


hps2={"kernelrr__lbd":[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10]}
searchKRR2= GridSearchCV(KRR,hps2,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKRR2.fit(X_2train, Y_2train)



Pipeline(steps=[('standardscaler', StandardScaler()), ('kernelrr', KernelRR())])
Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    7.3s finished


Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    4.5s finished


Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    5.0s finished


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('kernelrr', KernelRR())]),
             n_jobs=-1,
             param_grid={'kernelrr__lbd': [1e-06, 1e-05, 0.0001, 0.001, 0.01,
                                           0.1, 1, 10]},
             scoring='accuracy', verbose=1)

### Présentation des Résultats 
On remarque pour le training set 1 on a 10% en plus 20% en plus!!! pour le training set 2 et rien ne change pour le training set 0 par rapport au modèle linéraire. A Approfondir

In [16]:
présentation_résultat(searchKRR0,10)


NameError: name 'présentation_résultat' is not defined

In [None]:
présentation_résultat(searchKRR1,10)

In [None]:
présentation_résultat(searchKRR2,10)

In [None]:
KRR=make_pipeline(StandardScaler(),KernelRR(weight=False,gamma="auto",kernel="rbf"))
print(KRR)

hps0={"kernelrr__lbd":uniform(loc=10**-4,scale=5*10**-2)}
searchKRR0= RandomizedSearchCV(KRR,hps0,n_iter=500,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKRR0.fit(X_0train, Y_0train)

hps1={"kernelrr__lbd":uniform(loc=5*10**-5,scale=5*10**-2)}
searchKRR1=RandomizedSearchCV(KRR,hps1,n_iter=500,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKRR1.fit(X_1train, Y_1train)


hps2={"kernelrr__lbd":uniform(loc=5*10**-5,scale=5*10**-2)}
searchKRR2= RandomizedSearchCV(KRR,hps2,n_iter=100,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKRR2.fit(X_2train, Y_2train)


### Présentation des résultats

In [None]:
présentation_résultat(searchKRR0,10)


In [None]:
présentation_résultat(searchKRR1,10)

In [None]:
présentation_résultat(searchKRR2,10)

### Polynomial kernel

# Logistic Regression 

In [None]:
def sigmoid(v):
    return 1/(1+np.exp(-v))
def log_loss(v):
    return np.log(1+np.exp(-v))
def cross_loss(y,v):
    return -(y*np.log(v)+(1-y)*np.log(1-v))
from scipy.special import expit

## Solver Kernel Logistic Regression 

Le critère d'arrêt que j'ai mis c'est quand la différence deux itétéres consécutifs a une norme inférieure à $\epsilon$. Ici aussi je laisse la possibilité d'un biais.

In [55]:
def IRLS(X_train,y_train,lbd,ga,degree,c0,k,bs,ker,n_iter,eps=10**-6,method='slow'):
    n=y_train.shape[0]
  
    if ker=="rbf":
        K=rbf_kernel(X_train,ga)
    elif ker=="poly":
        K=poly_kernel(X_train,X_train,degree,c0)
    elif ker=="spectrum":
        K=spectrum_kernel(X_train,k)
    elif ker=="precomputed":
        K=X_train
    #alpha=Kernel_Ridge_Regression(K,y_train,lbd,False,1,bs,"precomputed")
    #alpha=np.zeros(n)
    #l=[]
  
    if bs :
        Kb=addbiais(K)
        K0=addzeros(K)
        alpha=np.zeros(n+1)
    else:
        alpha=np.zeros(n)
    for i in range(n_iter):
   
        alpha_old=alpha
       
        if bs:
            m=Kb.dot(alpha)
            #l.append(log_loss(y_train*m).mean()+lbd*alpha[:-1].dot(K.dot(alpha[:-1])))
        
        else:
            m=K.dot(alpha)
            #l.append(log_loss(y_train*m).mean()+lbd*alpha.dot(m))
        
        
        p=sigmoid(m)
       
        weight=p*(1-p)
       
        weight=np.where(weight<0.000001,0.000001,weight)
       
   
        u=np.where(sigmoid(y_train*m)<0.000001,0.000001,sigmoid(y_train*m))
        z = m + y_train/u
    
        if not(bs):
        
            S = np.diag(weight**-1)
            A=(K+2*lbd*n*S)
            alpha=solve(A,z,assume_a="sym")
            
            #print(np.linalg.norm(alpha_old-alpha))
            
            if np.linalg.norm(alpha_old-alpha)<eps:
                break
        else:
            S = np.diag(weight)
            A=(Kb.T.dot(S.dot(Kb))+2*lbd*n*K0)
            B=Kb.T.dot(S.dot(z))
            if method=="slow":
                alpha=lstsq(A,B)[0]
            else:
                alpha=solve(A,B,assume_a="sym")
            #print(np.linalg.norm(alpha_old-alpha))
            if np.linalg.norm(alpha_old-alpha)<eps:
                break
                
       
    return alpha #,l
        
#with np.printoptions(threshold=np.inf):

### Je construit l'estimateur compatible avec l'API de scikit 

In [54]:
class KernelLR(BaseEstimator,ClassifierMixin):
    def __init__(self,lbd=1,gamma='auto',degree=2,c0=1,k=3,biais=False,kernel="rbf",n_iter=15,method="slow"):
        self.lbd=lbd
        self.gamma=gamma
        self.degree=degree
        self.c0=c0
        self.k=k
        self.biais=biais
        self.kernel=kernel
        self.n_iter=n_iter
        self.method=method
    def fit(self,X,y):
        self.classes_ = np.unique(y)
        self.Xtr=X
        if isinstance(self.gamma,str) and self.kernel=="rbf":
            self.gamma=1/self.Xtr.shape[1]
        self.alpha=IRLS(X,y,self.lbd,self.gamma,self.degree,self.c0,self.k,self.biais,self.kernel,self.n_iter,method=self.method)
        return self
    def decision_function(self,X):
        if not(self.biais):
            if self.kernel=="precomputed":
                return X.dot(self.alpha)
            if self.kernel=="rbf":
                return K_rbf_kernel(X,self.Xtr,self.gamma).dot(self.alpha) 
            elif self.kernel=="poly":
                return poly_kernel(X,self.Xtr,self.degree,self.c0).dot(self.alpha) 
            elif self.kernel=="spectrum":
                return K_spectrum_kernel(X,self.Xtr,self.k).dot(self.alpha) 
        else:
            if self.kernel=="rbf":
                return addbiais(K_rbf_kernel(X,self.Xtr,self.gamma)).dot(self.alpha)
            elif self.kernel=="poly":
                return addbiais(poly_kernel(X,self.Xtr,self.degree,self.c0)).dot(self.alpha)
            elif self.kernel=="spectrum":
                return addbiais(K_spectrum_kernel(X,self.Xtr,self.k)).dot(self.alpha)
        

    def predict(self,X,y=None):
        scores=self.decision_function(X)
        if len(scores.shape) == 1:
            indices = (scores > 0).astype(np.int)
        else:
            indices = scores.argmax(axis=1)
        return self.classes_[indices]
    def predict_proba_(self,X,y=None):
        p=sigmoid(self.decision_function(X)).reshape(-1,1)
        return hstack((p,1-p))
    def get_params(self, deep=True):
    
        return {"lbd": self.lbd,"gamma":self.gamma,"degree":self.degree,"c0":self.c0,"k":self.k,"biais":self.biais,
                "kernel":self.kernel,"n_iter":self.n_iter,"method":self.method}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

### Recherche des Hyperparamètres 

In [None]:
KLR=make_pipeline(StandardScaler(),KernelLR())
print(KLR)

hps0={'kernellr__lbd':[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10]}
searchKLR0= GridSearchCV(KLR,hps0,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKLR0.fit(X_0train, Y_0train)

hps1={'kernellr__lbd':[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10]}
searchKLR1= GridSearchCV(KLR,hps1,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKLR1.fit(X_1train, Y_1train)


hps2={'kernellr__lbd':[10**-6,10**-5,10**-4,10**-3,10**-2,10**-1,1,10]}
searchKLR2= GridSearchCV(KLR,hps2,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKLR2.fit(X_2train, Y_2train)


### Présentation des résultats 

In [None]:
présentation_résultat(searchKLR0,10)

In [None]:
présentation_résultat(searchKLR1,10)

In [None]:
présentation_résultat(searchKLR2,10)

In [None]:
hps0={"kernellr__lbd":uniform(loc=5*10**-6,scale=5*10**-3)}
searchKLR0= RandomizedSearchCV(KLR,hps0,n_iter=500,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKLR0.fit(X_0train, Y_0train)

hps1={"kernellr__lbd":uniform(loc=5*10**-7,scale=5*10**-3)}
searchKLR1=RandomizedSearchCV(KLR,hps1,n_iter=500,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKLR1.fit(X_1train, Y_1train)


hps2={"kernellr__lbd":uniform(loc=5*10**-7,scale=5*10**-3)}
searchKLR2= RandomizedSearchCV(KLR,hps2,n_iter=500,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKLR2.fit(X_2train, Y_2train)

In [None]:
présentation_résultat(searchKLR0,10)

In [None]:
présentation_résultat(searchKLR1,10)

In [None]:
présentation_résultat(searchKLR2,10)

# SVM 
J'utilise cvxopt pour résoudre le QP

In [57]:
def SVM(X_train,y_train,C,gamma,degree,c0,k,kernel):
    n=y_train.shape[0]
    if kernel=="rbf":
        K=rbf_kernel(X_train,gamma)
    elif kernel=="poly":
        K=poly_kernel(X_train,X_train,degree,c0)
    elif kernel=="spectrum":
        K=spectrum_kernel(X_train,k)
    elif kernel=="precomputed":
        K=X_train
    P=matrix(K,tc='d')
    q=matrix(-y_train,tc='d')
    g1=np.diag(y_train)
    G=matrix(np.vstack((g1,-g1)),tc='d')
    h=matrix(np.hstack((np.repeat(C,n),np.zeros(n))),tc='d')
    solvers.options['show_progress'] = False
    sol=solvers.qp(P,q,G,h)
    return np.array(sol['x']).reshape(-1,)

### Création de l'estimateur compatible avec l'API de scikit 

Si vous remarquez bien j'utilise tout alpha pour définir ma fonction f(x) alors que je pourrai garder seulement les alpha_i telle que alpha_i plus grand > threshold (car le solver nous fera jamais arrivé exatement à zéro quand alpha_i doit être égale à zéro) car comme on le sait une solution optimale de SVM est un vecteur sparse. Je ne l'ai pas fait car j'ai eu du mal à définir le threshold, on trouve une trace de ma tentative dans le code de l'estimateur

In [58]:
class KernelSVM(BaseEstimator,ClassifierMixin):
    def __init__(self,C=1,gamma='auto',degree="2",c0=1,k=3,kernel="rbf"):
        self.C=C
        self.gamma=gamma
        self.degree=degree
        self.c0=c0
        self.k=k
        self.kernel=kernel
    def fit(self,X,y):
        self.classes_ = np.unique(y)
        self.Xtr=X
        if isinstance(self.gamma,str) and self.kernel=="rbf":
            self.gamma=1/self.Xtr.shape[1]
        self.alpha=SVM(X,y,self.C,self.gamma,self.degree,self.c0,self.k,self.kernel)
        #idx=self.alpha>10**-5
        #self.Xtr=self.Xtr[idx]
        #self.alpha=self.alpha[idx]
        return self
    def decision_function(self,X):
        if self.kernel=="precomputed":
            return X.dot(self.alpha)
        elif self.kernel=="rbf":
            return K_rbf_kernel(X,self.Xtr,self.gamma).dot(self.alpha) 
        elif self.kernel=="poly":
            return poly_kernel(X,self.Xtr,self.degree,self.c0).dot(self.alpha) 
        elif self.kernel=="spectrum":
            return K_spectrum_kernel(X,self.Xtr,self.k).dot(self.alpha) 
            
           
    def predict(self,X,y=None):
        scores=self.decision_function(X)
        if len(scores.shape) == 1:
            indices = (scores > 0).astype(np.int)
        else:
            indices = scores.argmax(axis=1)
        return self.classes_[indices]
    def get_params(self, deep=True):
    
        return {"C": self.C,"gamma":self.gamma,"degree":self.degree,"c0":self.c0,"k":self.k,"kernel":self.kernel}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

In [None]:
KSVM=make_pipeline(StandardScaler(),KernelSVM())
print(KSVM)

hps0={'kernelsvm__C':[10**-2,10**-1,1,10,50,100,500,1000]}
searchKSVM0= GridSearchCV(KSVM,hps0,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKSVM0.fit(X_0train, Y_0train)

hps1={'kernelsvm__C':[10**-2,10**-1,1,10,50,100,500,1000]}
searchKSVM1= GridSearchCV(KSVM,hps1,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKSVM1.fit(X_1train, Y_1train)


hps2={'kernelsvm__C':[10**-2,10**-1,1,10,50,100,500,1000]}
searchKSVM2= GridSearchCV(KSVM,hps2,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKSVM2.fit(X_2train, Y_2train)

In [None]:
présentation_résultat(searchKSVM0,10)

In [None]:
présentation_résultat(searchKSVM1,10)

In [None]:
présentation_résultat(searchKSVM2,10)

In [None]:

hps0={"kernelsvm__C":uniform(loc=0.5,scale=5)}
searchKSVM0= RandomizedSearchCV(KSVM,hps0,n_iter=100,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKSVM0.fit(X_0train, Y_0train)

hps1={"kernelsvm__C":uniform(loc=0.5,scale=5)}
searchKSVM1= RandomizedSearchCV(KSVM,hps1,n_iter=100,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKSVM1.fit(X_1train, Y_1train)

hps2={"kernelsvm__C":uniform(loc=0.1,scale=3)}
searchKSVM2= RandomizedSearchCV(KSVM,hps2,n_iter=100,scoring="accuracy",cv=5,verbose=1,n_jobs=-1)
searchKSVM2.fit(X_2train, Y_2train)

In [None]:
présentation_résultat(searchKSVM0,10)

In [None]:
présentation_résultat(searchKSVM1,10)

In [None]:
présentation_résultat(searchKSVM2,10)

In [None]:
KLR0=make_pipeline(StandardScaler(),KernelLR(lbd=0.00018))
KLR1=make_pipeline(StandardScaler(),KernelLR(lbd=0.00016))
KLR2=make_pipeline(StandardScaler(),KernelLR(lbd=5.89*(10**-5)))

X_0test=pd.read_csv("Data/Xte0_mat100.csv",sep=' ',header=None).to_numpy()
X_1test=pd.read_csv("Data/Xte1_mat100.csv",sep=' ',header=None).to_numpy()
X_2test=pd.read_csv("Data/Xte2_mat100.csv",sep=' ',header=None).to_numpy()


print(X_1train.shape)

In [211]:
def csv_file(models): #models is a list of 3 models
    Y_test=np.empty(0)
    for X_train, Y_train, X_test, model  in zip([X_0train,X_1train,X_2train], [Y_0train,Y_1train,Y_2train], [X_0test,X_1test,X_2test], models):
        model.fit(X_train, Y_train)
        Y_pred=model.predict(X_test)
        Y_test=np.concatenate((Y_test,np.where(Y_pred==-1,0,Y_pred)), axis=0)
    
    Y_test=Y_test.reshape(len(Y_test),1)
    with np.printoptions(threshold=np.inf):
        print(Y_test.shape)
    ids=np.arange(Y_test.shape[0])
    ids=ids.reshape(len(ids),1)
    
    df=pd.DataFrame(data=np.concatenate((ids,Y_test), axis=1), columns=['Id','Bound'],dtype=np.int)
    
    return df.to_csv('kernel_spectrum_try2.csv', index = False, header=True)

In [None]:
csv_file([KLR0,KLR1,KLR2])

# String kernel 


In [123]:
import time 
from tqdm import tqdm
from scipy.stats import rankdata
from scipy import sparse

In [215]:
X_0train=pd.read_csv('Data/Xtr0.csv', sep=',')['seq']
X_1train=pd.read_csv('Data/Xtr1.csv', sep=',')['seq']
X_2train=pd.read_csv('Data/Xtr2.csv', sep=',')['seq']

X_0test=pd.read_csv('Data/Xte0.csv', sep=',')['seq']
X_1test=pd.read_csv('Data/Xte1.csv', sep=',')['seq']
X_2test=pd.read_csv('Data/Xte2.csv', sep=',')['seq']

Y_0=pd.read_csv("Data/Ytr0.csv",sep=',')
Y_1=pd.read_csv("Data/Ytr1.csv",sep=',')
Y_2=pd.read_csv("Data/Ytr2.csv",sep=',')

Y_0train=np.where(Y_0["Bound"]==0,-1,Y_0["Bound"])
Y_1train=np.where(Y_1["Bound"]==0,-1,Y_1["Bound"])
Y_2train=np.where(Y_2["Bound"]==0,-1,Y_2["Bound"])


In [133]:
def K_spectrum_kernel(X,Y,k,alphabet="ACGT"):
    voc=product(alphabet, repeat=k)
    voc=[''.join(elt) for elt in voc]
    phi_X=np.vstack(X.apply(lambda x: [x[i:i+k] for i in range(0, len(x)-k+1)]).apply(lambda x: np.array([x.count(v) for v in voc])).to_numpy())
    phi_Y=np.vstack(Y.apply(lambda x: [x[i:i+k] for i in range(0, len(x)-k+1)]).apply(lambda x: np.array([x.count(v) for v in voc])).to_numpy())
    return phi_X.dot(phi_Y.T)
def spectrum_kernel(X,k,alphabet="ACGT"):
    voc=product(alphabet, repeat=k)
    voc=[''.join(elt) for elt in voc]
    phi_X=np.vstack(X.apply(lambda x: [x[i:i+k] for i in range(0, len(x)-k+1)]).apply(lambda x: np.array([x.count(v) for v in voc])).to_numpy())
   
    return phi_X.dot(phi_X.T)
    


In [None]:
print(spectrum_kernel(X_0train,8))

100%|██████████| 65536/65536 [00:00<00:00, 135031.15it/s]
100%|██████████| 65536/65536 [00:00<00:00, 107649.95it/s]
100%|██████████| 65536/65536 [00:00<00:00, 116545.49it/s]
100%|██████████| 65536/65536 [00:00<00:00, 135838.71it/s]
100%|██████████| 65536/65536 [00:00<00:00, 131883.68it/s]
100%|██████████| 65536/65536 [00:00<00:00, 129471.60it/s]
100%|██████████| 65536/65536 [00:00<00:00, 135528.48it/s]
100%|██████████| 65536/65536 [00:00<00:00, 133382.91it/s]
100%|██████████| 65536/65536 [00:00<00:00, 133276.79it/s]
100%|██████████| 65536/65536 [00:00<00:00, 133025.56it/s]
100%|██████████| 65536/65536 [00:00<00:00, 134912.32it/s]
100%|██████████| 65536/65536 [00:00<00:00, 138630.69it/s]
100%|██████████| 65536/65536 [00:00<00:00, 135062.93it/s]
100%|██████████| 65536/65536 [00:00<00:00, 131023.33it/s]
100%|██████████| 65536/65536 [00:00<00:00, 131368.73it/s]
100%|██████████| 65536/65536 [00:00<00:00, 127028.35it/s]
100%|██████████| 65536/65536 [00:00<00:00, 128435.74it/s]
100%|█████████

In [173]:
KSVM=KernelSVM(kernel="precomputed")
print(KSVM)

hps0={'C':[10**-2,10**-1,1,10,50,100,500,1000],'k':[2,3,4,5,6,7,8]}
searchKSVM0= Cross_val_spectrum(X_0train,Y_0train,KSVM,hps0)


hps1={'C':[10**-2,10**-1,1,10,50,100,500,1000],'k':[2,3,4,5,6,7,8]}
searchKSVM1= Cross_val_spectrum(X_1train,Y_1train,KSVM,hps1)



hps2={'C':[10**-2,10**-1,1,10,50,100,500,1000],'k':[2,3,4,5,6,7,8]}
searchKSVM2= Cross_val_spectrum(X_2train,Y_2train,KSVM,hps2)


 14%|█▍        | 1/7 [00:00<00:00,  6.35it/s]

KernelSVM(kernel='precomputed')


100%|██████████| 7/7 [06:14<00:00, 53.51s/it] 
56it [13:11, 14.13s/it]
100%|██████████| 7/7 [06:13<00:00, 53.42s/it] 
56it [35:18, 37.83s/it]
100%|██████████| 7/7 [06:59<00:00, 59.88s/it] 
56it [15:13, 16.31s/it]


In [183]:
présentation_résultat2(searchKSVM0,30)

Unnamed: 0,C,k,mean_test_score
0,0.01,8,0.6455
1,0.01,7,0.645
2,0.1,8,0.6405
3,1.0,8,0.6405
4,10.0,8,0.6405
5,50.0,8,0.6405
6,100.0,8,0.6405
7,500.0,8,0.6405
8,1000.0,8,0.6405
9,0.01,6,0.638


In [185]:
présentation_résultat2(searchKSVM1,40)

Unnamed: 0,C,k,mean_test_score
0,0.01,5,0.6435
1,0.01,6,0.638
2,0.01,7,0.6295
3,0.1,7,0.62
4,1.0,7,0.62
5,10.0,7,0.62
6,50.0,7,0.62
7,100.0,7,0.62
8,500.0,7,0.62
9,1000.0,7,0.62


In [186]:
présentation_résultat2(searchKSVM2,40)

Unnamed: 0,C,k,mean_test_score
0,0.01,7,0.7415
1,0.1,7,0.7335
2,1.0,7,0.731
3,10.0,7,0.731
4,50.0,7,0.731
5,100.0,7,0.731
6,500.0,7,0.731
7,1000.0,7,0.731
8,0.01,6,0.729
9,0.01,8,0.7275


In [200]:
hps0={'C':uniform(loc=0.005,scale=0.5),'k':[8]}
searchKSVM0= Randomized_Cross_val_spectrum(X_0train,Y_0train,KSVM,hps0,50)


hps1={'C':uniform(loc=0.005,scale=0.5),'k':[5]}
searchKSVM1= Randomized_Cross_val_spectrum(X_1train,Y_1train,KSVM,hps1,50)



hps2={'C':uniform(loc=0.005,scale=0.5),'k':[7]}
searchKSVM2= Randomized_Cross_val_spectrum(X_2train,Y_2train,KSVM,hps2,50)


100%|██████████| 1/1 [06:23<00:00, 383.41s/it]
0it [00:00, ?it/s]

{'C': array([0.46666505, 0.26907594, 0.12523631, 0.45466107, 0.26347195,
       0.19420895, 0.18226131, 0.49544762, 0.01318606, 0.41101619,
       0.45164923, 0.24666404, 0.05044877, 0.3151696 , 0.05091285,
       0.42096472, 0.11227579, 0.2445298 , 0.30944494, 0.40553232,
       0.49069645, 0.41870792, 0.4170422 , 0.41755719, 0.4080609 ,
       0.05694352, 0.21573527, 0.01444699, 0.3645368 , 0.22691003,
       0.21797331, 0.07831073, 0.01927725, 0.21226899, 0.43667885,
       0.02985245, 0.0565717 , 0.19886476, 0.11112374, 0.30510266,
       0.08007239, 0.47218621, 0.35868019, 0.24887759, 0.29805176,
       0.25165495, 0.39362325, 0.45773882, 0.07267951, 0.05861149]), 'k': array([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
       8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
       8, 8, 8, 8, 8, 8])}


50it [09:22, 11.24s/it]
100%|██████████| 1/1 [00:05<00:00,  5.15s/it]
0it [00:00, ?it/s]

{'C': array([0.43176172, 0.26195195, 0.24518519, 0.41727215, 0.09133693,
       0.34584573, 0.4296129 , 0.27213185, 0.4455721 , 0.34425933,
       0.12402668, 0.38649431, 0.04172012, 0.40968717, 0.14548111,
       0.08202455, 0.48124638, 0.17726664, 0.41057927, 0.1769999 ,
       0.34431211, 0.04408077, 0.20784162, 0.39786877, 0.48186417,
       0.38823755, 0.42120227, 0.47118595, 0.06240904, 0.23616567,
       0.18784333, 0.23993454, 0.12151771, 0.49477509, 0.18250762,
       0.28094271, 0.20887174, 0.24207455, 0.40490859, 0.08339908,
       0.12515863, 0.06248199, 0.24669478, 0.26764686, 0.07934126,
       0.16136374, 0.1944452 , 0.30599783, 0.19357854, 0.08016435]), 'k': array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5])}


50it [10:43, 12.88s/it]
100%|██████████| 1/1 [01:19<00:00, 79.43s/it]
0it [00:00, ?it/s]

{'C': array([0.24811796, 0.19046197, 0.02332814, 0.37084938, 0.28486939,
       0.43829491, 0.26565659, 0.17937156, 0.06996736, 0.3658085 ,
       0.28177865, 0.06674257, 0.28349039, 0.25354877, 0.400276  ,
       0.49915773, 0.46203365, 0.43082258, 0.01423607, 0.42812112,
       0.4607758 , 0.38853215, 0.40922324, 0.25191396, 0.3117775 ,
       0.31132515, 0.39262195, 0.23339819, 0.3925347 , 0.1423535 ,
       0.1797383 , 0.40227858, 0.41858376, 0.05750593, 0.05113303,
       0.06096535, 0.29375129, 0.09998449, 0.32552638, 0.15315188,
       0.24206569, 0.41775379, 0.47600435, 0.43112248, 0.17612762,
       0.25724631, 0.27676258, 0.0929287 , 0.11508991, 0.18911116]), 'k': array([7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7])}


50it [18:04, 21.69s/it]


In [203]:
présentation_résultat2(searchKSVM0,30)

Unnamed: 0,C,k,mean_test_score
0,0.050448771307686,8,0.6435
1,0.0509128513033783,8,0.643
2,0.0298524502258161,8,0.6425
3,0.0131860596454099,8,0.641
4,0.0569435202737617,8,0.641
5,0.0144469927864557,8,0.641
6,0.0565717033598978,8,0.641
7,0.0586114863011459,8,0.641


In [207]:
présentation_résultat2(searchKSVM1,30)

Unnamed: 0,C,k,mean_test_score
0,0.0417201150862655,5,0.624
1,0.0440807747696007,5,0.6235
2,0.0624090423997785,5,0.6205
3,0.0624819944260057,5,0.62
4,0.0820245538771032,5,0.6155
5,0.1215177094235083,5,0.6155
6,0.1613637411943103,5,0.6145
7,0.1772666391963098,5,0.6145
8,0.1769998975410397,5,0.6145
9,0.0833990814115075,5,0.6145


In [208]:
présentation_résultat2(searchKSVM2,30)

Unnamed: 0,C,k,mean_test_score
0,0.0142360671633573,7,0.741
1,0.0233281375024019,7,0.735
2,0.0699673590736214,7,0.7345
3,0.0609653510218289,7,0.7345
4,0.0511330346645851,7,0.734
5,0.0667425702295247,7,0.734
6,0.0575059346071671,7,0.734
7,0.0999844942020891,7,0.7335
8,0.092928697077708,7,0.733
9,0.1150899081854529,7,0.733


In [213]:
model0=KernelSVM(C=0.01,k=8,kernel="spectrum")
model1=KernelSVM(C=0.01,k=5,kernel="spectrum")
model2=KernelSVM(C=0.01,k=7,kernel="spectrum")

In [216]:
csv_file([model0,model1,model2])

(3000, 1)


In [172]:
def Cross_val_spectrum(X_train,Y_train,model,hps,cv=5):
    CV=StratifiedKFold(cv)
    dic_K={k:spectrum_kernel(X_train,k) for k in tqdm(hps["k"])}
    list_hp=[]
    list_val_score=[]
    idx_k=list(hps.keys()).index("k")
    
    for i in tqdm(product(*hps.values())):
        
        dic_hp={keys:values for keys,values in zip(hps.keys(),i)}
        list_hp.append(dic_hp)
        
        model.set_params(**dic_hp)
        acc_mean=0
        for train_idx,val_idx in CV.split(X_train,Y_train):
            K=dic_K[i[idx_k]]
            model.fit(K[train_idx][:,train_idx],Y_train[train_idx])
            Y_pred=model.predict(K[val_idx][:,train_idx])
            acc_mean+=accuracy_score(Y_train[val_idx],Y_pred)
        list_val_score.append(acc_mean/cv)
    return {"params":list_hp,"mean_test_score":np.array(list_val_score),"rank_test_score":rankdata(list_val_score)}
            
            
    
        

In [198]:
def Randomized_Cross_val_spectrum(X_train,Y_train,model,hps,n_iter,cv=5):
    CV=StratifiedKFold(cv)
    dic_K={k:spectrum_kernel(X_train,k) for k in tqdm(hps["k"])}
    list_hp=[]
    list_val_score=[]
    idx_k=list(hps.keys()).index("k")
    for keys,values in hps.items():
        if isinstance(values,scipy.stats._distn_infrastructure.rv_frozen):
            hps[keys]=values.rvs(size=n_iter)
        else:
            hps[keys]=np.random.choice(values,n_iter)
    print(hps)
    for i in tqdm(zip(*hps.values())):
        
        dic_hp={keys:values for keys,values in zip(hps.keys(),i)}
        list_hp.append(dic_hp)
        
        model.set_params(**dic_hp)
        acc_mean=0
        for train_idx,val_idx in CV.split(X_train,Y_train):
            K=dic_K[i[idx_k]]
            model.fit(K[train_idx][:,train_idx],Y_train[train_idx])
            Y_pred=model.predict(K[val_idx][:,train_idx])
            acc_mean+=accuracy_score(Y_train[val_idx],Y_pred)
        list_val_score.append(acc_mean/cv)
    return {"params":list_hp,"mean_test_score":np.array(list_val_score),"rank_test_score":rankdata(list_val_score)}
            
            

In [199]:
hps={'C':uniform(loc=0.5,scale=2),'k':[3]}
CV=StratifiedKFold(5)
list_hp=[]
n_iter=10
list_val_score=[]
dic_K={k:np.arange(k) for k in hps["k"]}
idx_k=list(hps.keys()).index("k")
for keys,values in hps.items():
    if isinstance(values,scipy.stats._distn_infrastructure.rv_frozen):
        hps[keys]=values.rvs(size=n_iter)
    else:
        hps[keys]=np.random.choice(values,n_iter)
print(hps)

for i in zip(*hps.values()):
    dic_hp={keys:values for keys,values in zip(hps.keys(),i)}
    KSVM.set_params(**dic_hp)
    print(dic_hp)
    for train_idx,test_idx in CV.split(X_0train,Y_0train):
        x=5
       
   
    

{'C': array([2.29109288, 1.28619423, 0.54370751, 1.51972859, 1.29469697,
       1.70426481, 2.27696594, 0.52956819, 1.45780128, 1.24405329]), 'k': array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3])}
{'C': 2.2910928819369967, 'k': 3}
{'C': 1.2861942342546977, 'k': 3}
{'C': 0.5437075121323791, 'k': 3}
{'C': 1.5197285935541702, 'k': 3}
{'C': 1.2946969722600201, 'k': 3}
{'C': 1.7042648064952854, 'k': 3}
{'C': 2.276965939325671, 'k': 3}
{'C': 0.5295681908427132, 'k': 3}
{'C': 1.4578012788210766, 'k': 3}
{'C': 1.2440532860608564, 'k': 3}


In [193]:
type(uniform(loc=0.5,scale=1))
isinstance(uniform(loc=0.5,scale=1),scipy.stats._distn_infrastructure.rv_frozen)

True

In [192]:
import scipy

In [17]:
for i in product(*dic.values()):
    print(i)
    dac={keys:values for keys,values in zip(dic.keys(),i)}
    print(dac)

(1, 2)
{'a': 1, 'b': 2}
(1, 3)
{'a': 1, 'b': 3}
(3, 2)
{'a': 3, 'b': 2}
(3, 3)
{'a': 3, 'b': 3}
(4, 2)
{'a': 4, 'b': 2}
(4, 3)
{'a': 4, 'b': 3}
(5, 2)
{'a': 5, 'b': 2}
(5, 3)
{'a': 5, 'b': 3}


In [40]:
A=np.diag(np.arange(10))
idx=np.array([1,3,5])
A[idx]

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 3, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 5, 0, 0, 0, 0]])

In [182]:
def présentation_résultat2(search,n):
    mask=search['rank_test_score']<=n
    params=list(compress(search['params'], list(mask)))
    mean_test_score=search['mean_test_score'][mask]
    a={}
    for i in range(mean_test_score.size):
        k=''
        for key, value in params[i].items():
            k+=" "+key+" "+str(value)
        a.update({k:mean_test_score[i]})
        sortedDict = sorted(a.items(), key=lambda x: x[1],reverse=True)
    l=[]
    for i in sortedDict:
        u=i[0].split(sep=' ')
        del(u[0])
        lp=[]
        for j in u[1::2]:
            lp.append(j)
        lp.append(i[1])
        l.append(lp)
    head=list(params[0].keys())+["mean_test_score"]

    return(pd.DataFrame(l,columns=head))
        