In [141]:
!pip install sklearnkernels



In [142]:
from sklearnkernels.KSVM import KSVC
from sklearnkernels.KANN import KANNC
from sklearn.datasets import load_breast_cancer,load_digits, load_iris, load_wine,make_circles
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import  GridSearchCV, RandomizedSearchCV

In [143]:
path="/content/drive/MyDrive/Colab Notebooks/CACIED/ComparaciónFuncionesKernel/"

In [144]:
def fxor():
  rng = np.random.RandomState(0)
  X = rng.randn(250, 2)
  y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)
  return X,y

import math

def make_spiral (n_points, n_turns, noise=.5):
  noises = np.random.normal(0, noise, n_points)
  angles = np.random.rand(n_points) * n_turns * np.pi

  spiral1 = []
  spiral2 = []
  
  for angulo, ruido in np.nditer([angles, noises]):
    x1 = -angulo * math.cos(angulo) + ruido
    y1 =  angulo * math.sin(angulo) + ruido
    x2 =  angulo * math.cos(angulo) + ruido
    y2 = -angulo * math.sin(angulo) + ruido
    spiral1.append([x1, y1, 1])
    spiral2.append([x2, y2, 0])
  return np.concatenate((spiral1, spiral2))


# Carga de datos

In [145]:
breast_cancer=load_breast_cancer()
digits=load_digits()
iris=load_iris()
wine=load_wine()
#------
circles=make_circles(n_samples=500,noise=0.1, random_state=2,factor=0.6)
xor=fxor()
spiral=make_spiral(500, 3, 0.1)

In [146]:
def getXY(dataset, name):
  if name=="circles" or name=="xor":
    return dataset[0],dataset[1]
  if name=="spiral":
    return dataset[:,0:2],dataset[:,-1]
  return dataset.data, dataset.target  

X,y=getXY(wine,"wine")
X.shape,y.shape

((178, 13), (178,))

In [147]:
dsdic={"breast_cancer":breast_cancer,"digits":digits,"iris":iris,"wine":wine,"circles":circles,"xor":xor,"spiral":spiral}


In [148]:
mbca={"name":"breast cancer", "nreg":breast_cancer.data.shape[0], "ndim":breast_cancer.data.shape[1], "nclass":breast_cancer.target_names.shape[0]}
mdig={"name":"digits", "nreg":digits.data.shape[0], "ndim":digits.data.shape[1], "nclass":digits.target_names.shape[0]}
miri={"name":"iris", "nreg":iris.data.shape[0], "ndim":iris.data.shape[1], "nclass":iris.target_names.shape[0]}
mwin={"name":"wine", "nreg":wine.data.shape[0], "ndim":wine.data.shape[1], "nclass":wine.target_names.shape[0]}
mcir={"name":"circles", "nreg":circles[0].shape[0], "ndim":circles[0].shape[1], "nclass":np.unique(circles[1]).shape[0]}
mxor={"name":"xor", "nreg":xor[0].shape[0], "ndim":xor[0].shape[1], "nclass":np.unique(xor[1]).shape[0]}
mspi={"name":"spiral", "nreg":spiral[:,0:2].shape[0], "ndim":spiral[:,0:2].shape[1], "nclass":np.unique(spiral[:,-1]).shape[0]}

lst_ds=[mbca,mdig,miri,mwin,mcir,mxor,mspi]
df_ds=pd.DataFrame(lst_ds)
df_ds.to_csv(path+"explore_dataset.csv",)


# Exploración

In [149]:
df_ds=pd.read_csv(path+"explore_dataset.csv",usecols=["name","nreg","ndim","nclass"])
df_ds

Unnamed: 0,name,nreg,ndim,nclass
0,breast cancer,569,30,2
1,digits,1797,64,10
2,iris,150,4,3
3,wine,178,13,3
4,circles,500,2,2
5,xor,250,2,2
6,spiral,1000,2,2


# Experimento

In [150]:
rq_params={"svc__kernel": ['rquadratic'], "svc__C": np.logspace(0,5,10),"svc__coef0" : np.logspace(-4,4,20),"svc__gamma" : ["auto"]}
rbf_params={"svc__kernel": ['rbf'],"svc__C": np.logspace(0,5,10), "svc__gamma" : np.logspace(-4,4,20)}
tru_params={"svc__kernel": ['tru'],"svc__C": np.logspace(0,5,10), "svc__gamma" : np.logspace(-4,4,20)}
can_params={"svc__kernel": ['can'],"svc__C": np.logspace(0,5,10),"svc__gamma" : np.logspace(-4,4,20)}
rb_params={"svc__kernel": ['radial_basic'],"svc__C": np.logspace(0,5,10),"svc__gamma" : np.logspace(-4,4,20)}
tri_params={"svc__kernel": ['triangle'],"svc__C": np.logspace(0,5,10),"svc__gamma" : np.logspace(-4,4,20)}
hp_params={"svc__kernel": ['hyperbolic'],"svc__C": np.logspace(0,5,10), "svc__gamma" : np.logspace(-4,4,20)}

params=[rq_params,rbf_params,tru_params,can_params,rb_params,tri_params,hp_params]
p_sts_ksvc = Pipeline([('sscaler', StandardScaler()), ('svc', KSVC())])
p_sts_kann = Pipeline([('sscaler', StandardScaler()), ('svc', KANNC())])

pipes=[{"name":"SScalerKANN","pipe":p_sts_kann},{"name":"SScalerKSVC","pipe":p_sts_ksvc}]

In [151]:
def random_searchFit(X,y,filename):
  best_params=[]
  for pipe in pipes:
    print(pipe["name"])
    for param in params:    
      print(param)    
      clf=RandomizedSearchCV(pipe["pipe"],param,cv=5, random_state=2021, n_jobs=-1)
      clf.fit(X,y)
      best_params.append({"pipe":pipe["name"],"best_params":clf.best_params_,"score":clf.best_score_, "cv_results":clf.cv_results_})
      print(clf.best_score_)

  lst_best_params=[]

  for bp in  best_params:
    mean_test_score=bp['cv_results']['mean_test_score']  
    mean_fit_time=bp['cv_results']['mean_fit_time']
    mean_score_time=bp['cv_results']['mean_score_time']
    std_test_score=bp['cv_results']['std_test_score']
    std_fit_time=bp['cv_results']['std_fit_time']
    std_score_time=bp['cv_results']['std_score_time']
    
    i=np.argmax(mean_test_score)
    lst_best_params.append({
        'Scaler':bp['pipe'],
        'kernel':bp['best_params']['svc__kernel'],
        'mean_test_score':mean_test_score[i],
        'std_test_score':std_test_score[i],
        'mean_fit_time':mean_fit_time[i],
        'std_fit_time':std_fit_time[i],
        'mean_score_time':mean_score_time[i],      
        'std_score_time':std_score_time[i],
        'best_param':bp['best_params']
        })
  df_bp=pd.DataFrame(lst_best_params)
  df_bp.to_csv(path+filename)

In [152]:
for dataset in dsdic:
  X,y=getXY(dsdic[dataset],dataset)
  X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=2021)
  print("Dataset",dataset)
  random_searchFit(X_train,y_train,dataset)


Dataset breast_cancer
SScalerKANN
{'svc__kernel': ['rquadratic'], 'svc__C': array([1.00000000e+00, 3.59381366e+00, 1.29154967e+01, 4.64158883e+01,
       1.66810054e+02, 5.99484250e+02, 2.15443469e+03, 7.74263683e+03,
       2.78255940e+04, 1.00000000e+05]), 'svc__coef0': array([1.00000000e-04, 2.63665090e-04, 6.95192796e-04, 1.83298071e-03,
       4.83293024e-03, 1.27427499e-02, 3.35981829e-02, 8.85866790e-02,
       2.33572147e-01, 6.15848211e-01, 1.62377674e+00, 4.28133240e+00,
       1.12883789e+01, 2.97635144e+01, 7.84759970e+01, 2.06913808e+02,
       5.45559478e+02, 1.43844989e+03, 3.79269019e+03, 1.00000000e+04]), 'svc__gamma': ['auto']}
0.7296703296703296
{'svc__kernel': ['rbf'], 'svc__C': array([1.00000000e+00, 3.59381366e+00, 1.29154967e+01, 4.64158883e+01,
       1.66810054e+02, 5.99484250e+02, 2.15443469e+03, 7.74263683e+03,
       2.78255940e+04, 1.00000000e+05]), 'svc__gamma': array([1.00000000e-04, 2.63665090e-04, 6.95192796e-04, 1.83298071e-03,
       4.83293024e-03, 1