In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

In [2]:
path  = r'data_cerebro/Brain Tumor.csv'
data = pd.read_csv(path)

data = data.drop(['Image','Coarseness'], axis=1)
# Nombres de los atributos y el target
nombres = ['Class','Mean','Variance','Standard Deviation','Entropy','Skewness','Kurtosis','Contrast','Energy',
           'ASM','Homogeneity','Dissimilarity','Correlation']
# Se mueve la clase a la ultima columna
nombres = nombres[1:] + nombres[:1]
data = data[nombres]
data

Unnamed: 0,Mean,Variance,Standard Deviation,Entropy,Skewness,Kurtosis,Contrast,Energy,ASM,Homogeneity,Dissimilarity,Correlation,Class
0,6.535339,619.587845,24.891522,0.109059,4.276477,18.900575,98.613971,0.293314,0.086033,0.530941,4.473346,0.981939,0
1,8.749969,805.957634,28.389393,0.266538,3.718116,14.464618,63.858816,0.475051,0.225674,0.651352,3.220072,0.988834,0
2,7.341095,1143.808219,33.820234,0.001467,5.061750,26.479563,81.867206,0.031917,0.001019,0.268275,5.981800,0.978014,1
3,5.958145,959.711985,30.979219,0.001477,5.677977,33.428845,151.229741,0.032024,0.001026,0.243851,7.700919,0.964189,1
4,7.315231,729.540579,27.010009,0.146761,4.283221,19.079108,174.988756,0.343849,0.118232,0.501140,6.834689,0.972789,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3757,21.234512,1208.850174,34.768523,0.063774,2.082079,4.647310,158.437600,0.220666,0.048693,0.487131,5.211739,0.950972,0
3758,20.435349,1227.151440,35.030721,0.066763,2.144625,4.882034,161.158675,0.225931,0.051045,0.502712,5.083126,0.952749,0
3759,18.011520,1151.582765,33.934978,0.068396,2.308349,5.579498,167.130118,0.228930,0.052409,0.492269,5.103700,0.952181,0
3760,13.330429,945.732779,30.752769,0.087872,2.732822,7.757570,223.812932,0.261527,0.068397,0.480064,6.439784,0.940898,0


In [3]:
# Se separan las caracteristicas y el Target
X = np.array(data.drop(['Class'],1))
y = np.array(data['Class'])

In [4]:
# Se separa los datos en entrenamiento y prueba para probar los algoritmos
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.3)

In [5]:
pipe = Pipeline([('escalmaiento_1',MinMaxScaler(feature_range=(0,1))),
                 ('Transformacion',PowerTransformer(method='yeo-johnson',standardize=True)),
                 ('escalmaiento_2',MinMaxScaler(feature_range=(0,1))),
                ('SVC', SVC(C=6, kernel='poly'))])

pipe.fit(x_train, y_train)
y_pred = pipe.predict(x_test)
print(f"Accuracy (train): {pipe.score(x_train, y_train)}")
print(f"Accuracy (test): {pipe.score(x_test, y_test)}")

Accuracy (train): 0.9958222559817699
Accuracy (test): 0.9937998228520815


### Otra forma

In [6]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [7]:
estimadores = []
estimadores.append(('escalmaiento_1',MinMaxScaler(feature_range=(0,1))))
estimadores.append(('Transformacion',PowerTransformer(method='yeo-johnson',standardize=True)))
estimadores.append(('escalmaiento_2',MinMaxScaler(feature_range=(0,1))))
estimadores.append(('SVC', SVC(C=6, kernel='poly')))
model = Pipeline(estimadores)

kfold = KFold(n_splits=10)
results = cross_val_score(model, X, y, cv = kfold)
print(f"Accuracy: {results.mean()*100.0:,.2f}% ({results.std()*100.0:,.2f})")

Accuracy: 99.34% (0.40)
