In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC

In [2]:
data = pd.read_csv('data/pokemon_preprocessed.csv')

In [3]:
pokemon_df = data.drop(['Winner','Type 1_first','Type 1_second','Type 2_first','Type 2_second','#_x','#_y'],axis=1)
pokemon_df

Unnamed: 0,First_pokemon,Second_pokemon,Winner_mark,HP_first,Attack_first,Defense_first,Sp. Atk_first,Sp. Def_first,Speed_first,Generation_first,Legendary_first,HP_second,Attack_second,Defense_second,Sp. Atk_second,Sp. Def_second,Speed_second,Generation_second,Legendary_second
0,266,298,2,50,64,50,45,50,41,2,0,70,70,40,60,40,60,3,0
1,702,701,2,91,90,72,90,129,108,5,1,91,129,90,72,90,108,5,1
2,191,668,2,55,40,85,80,105,40,2,0,75,75,75,125,95,40,5,0
3,237,683,2,40,40,40,70,40,20,2,0,77,120,90,60,90,48,5,0
4,151,231,1,70,60,125,115,70,55,1,0,20,10,230,10,230,5,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,707,126,1,100,120,100,150,120,90,5,1,30,40,70,70,25,60,1,0
49996,589,664,1,60,85,40,30,45,68,5,0,35,55,40,45,40,60,5,0
49997,303,368,2,60,50,100,85,70,65,3,0,73,115,60,60,60,90,3,0
49998,109,89,1,40,30,50,55,55,100,1,0,25,35,70,95,55,45,1,0


In [4]:
pokemon_df.drop(['First_pokemon','Second_pokemon'],axis=1,inplace=True)

In [5]:
pokemon_df

Unnamed: 0,Winner_mark,HP_first,Attack_first,Defense_first,Sp. Atk_first,Sp. Def_first,Speed_first,Generation_first,Legendary_first,HP_second,Attack_second,Defense_second,Sp. Atk_second,Sp. Def_second,Speed_second,Generation_second,Legendary_second
0,2,50,64,50,45,50,41,2,0,70,70,40,60,40,60,3,0
1,2,91,90,72,90,129,108,5,1,91,129,90,72,90,108,5,1
2,2,55,40,85,80,105,40,2,0,75,75,75,125,95,40,5,0
3,2,40,40,40,70,40,20,2,0,77,120,90,60,90,48,5,0
4,1,70,60,125,115,70,55,1,0,20,10,230,10,230,5,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,1,100,120,100,150,120,90,5,1,30,40,70,70,25,60,1,0
49996,1,60,85,40,30,45,68,5,0,35,55,40,45,40,60,5,0
49997,2,60,50,100,85,70,65,3,0,73,115,60,60,60,90,3,0
49998,1,40,30,50,55,55,100,1,0,25,35,70,95,55,45,1,0


In [6]:
pokemon_df['HP_diff'] = pokemon_df['HP_first'] - pokemon_df['HP_second']
pokemon_df['Attack_diff'] = pokemon_df['Attack_first'] - pokemon_df['Attack_second']
pokemon_df['Defense_diff'] = pokemon_df['Defense_first'] - pokemon_df['Defense_second']
pokemon_df['Sp. Atk_diff'] = pokemon_df['Sp. Atk_first'] - pokemon_df['Sp. Atk_second']
pokemon_df['Sp. Def_diff'] = pokemon_df['Sp. Def_first'] - pokemon_df['Sp. Def_second']
pokemon_df['Speed_diff'] = pokemon_df['Speed_first'] - pokemon_df['Speed_second']
pokemon_df['Legendary_second'].astype(str)

0        0
1        1
2        0
3        0
4        0
        ..
49995    0
49996    0
49997    0
49998    0
49999    0
Name: Legendary_second, Length: 50000, dtype: object

In [10]:
pokemon_df['Generation_second'] = pokemon_df['Generation_second'].astype(str)

In [32]:
pokemon_df = pd.read_csv('data/pokemon_df_rf.csv')

In [33]:
pokemon_df

Unnamed: 0,Winner_mark,Generation_first,Legendary_first,Generation_second,Legendary_second,HP_diff,Attack_diff,Defense_diff,Sp. Atk_diff,Sp. Def_diff,Speed_diff
0,2,2,0,3,0,-20,-6,10,-15,10,-19
1,2,5,1,5,1,0,-39,-18,18,39,0
2,2,2,0,5,0,-20,-35,10,-45,10,0
3,2,2,0,5,0,-37,-80,-50,10,-50,-28
4,1,1,0,2,0,50,50,-105,105,-160,50
...,...,...,...,...,...,...,...,...,...,...,...
49995,1,5,1,1,0,70,80,30,80,95,30
49996,1,5,0,5,0,25,30,0,-15,5,8
49997,2,3,0,3,0,-13,-65,40,25,10,-25
49998,1,1,0,1,0,15,-5,-20,-40,0,55


In [34]:
pokemon_df = pd.get_dummies(pokemon_df, columns=['Generation_first','Generation_second','Legendary_first','Legendary_second'])

In [35]:
X = pokemon_df.drop('Winner_mark',axis=1)
y = pokemon_df['Winner_mark']

In [36]:
X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,random_state=1)

In [37]:
# scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [38]:
# linearsvc
svc = SVC(kernel='linear',random_state=1,C=0.01)

In [39]:
svc.fit(X_train_scaled,y_train)

SVC(C=0.01, kernel='linear', random_state=1)

In [40]:
pred_train = svc.predict(X_train_scaled)
pred_test = svc.predict(X_test_scaled)
accuracy_score(y_train,pred_train),accuracy_score(y_test,pred_test)

(0.9082933333333333, 0.91368)

In [41]:
rbf_svc = SVC(kernel='rbf',
             C=1,
             gamma = 0.01,
             probability=True,
             random_state=1)

In [42]:
rbf_svc.fit(X_train_scaled,y_train)

SVC(C=1, gamma=0.01, probability=True, random_state=1)

In [43]:
pred_train_rbf= rbf_svc.predict(X_train_scaled)
pred_test_rbf = rbf_svc.predict(X_test_scaled)

In [44]:
accuracy_score(y_train,pred_train_rbf), accuracy_score(y_test,pred_test_rbf)

(0.9098666666666667, 0.91192)

In [45]:
from sklearn.metrics import recall_score,precision_score
recall_score(y_train,pred_train_rbf),precision_score(y_train,pred_train_rbf)

(0.9068979153720129, 0.9026145628338488)

In [46]:
from sklearn.metrics import recall_score,precision_score, roc_auc_score ,average_precision_score
pos_proba = rbf_svc.predict_proba(X_train_scaled)[:,1]
roc_auc_score(y_train,pos_proba),average_precision_score(y_train,pos_proba)

(0.9295344558784943, 0.2940021367612162)

In [50]:
from sklearn.model_selection import train_test_split, GridSearchCV
param = {
    'kernel':['rbf','linear'],
    'C':[0.001,0.01,0.1,1],
    'gamma':[0.001,0.01,0.1,1,10],
}
svc = SVC(random_state=1,probability=True)
gs_svc = GridSearchCV(svc,
                     param_grid = param,
                     scoring = 'accuracy',
                     cv = 3,
                     n_jobs=-1)

In [51]:
gs_svc.fit(X_train_scaled,y_train)

GridSearchCV(cv=3, estimator=SVC(probability=True, random_state=1), n_jobs=-1,
             param_grid={'C': [0.001, 0.01, 0.1, 1],
                         'gamma': [0.001, 0.01, 0.1, 1, 10],
                         'kernel': ['rbf', 'linear']},
             scoring='accuracy')

In [52]:
gs_svc.best_params_

{'C': 1, 'gamma': 0.001, 'kernel': 'linear'}

In [53]:
pd.DataFrame(gs_svc.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,676.263652,0.324433,20.782194,0.066827,0.001,0.001,rbf,"{'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}",0.52792,0.528,0.528,0.527973,3.8e-05,34
1,313.159039,7.191226,6.744949,0.219101,0.001,0.001,linear,"{'C': 0.001, 'gamma': 0.001, 'kernel': 'linear'}",0.89432,0.89424,0.89624,0.894933,0.000925,20
2,671.725689,0.809226,20.576911,0.115483,0.001,0.01,rbf,"{'C': 0.001, 'gamma': 0.01, 'kernel': 'rbf'}",0.63696,0.62488,0.63048,0.630773,0.004936,32
3,339.284716,20.179149,9.040647,1.171559,0.001,0.01,linear,"{'C': 0.001, 'gamma': 0.01, 'kernel': 'linear'}",0.89432,0.89424,0.89624,0.894933,0.000925,20
4,767.480394,0.558935,21.306297,1.503122,0.001,0.1,rbf,"{'C': 0.001, 'gamma': 0.1, 'kernel': 'rbf'}",0.52792,0.528,0.528,0.527973,3.8e-05,34
5,352.701813,4.665465,7.615892,0.593072,0.001,0.1,linear,"{'C': 0.001, 'gamma': 0.1, 'kernel': 'linear'}",0.89432,0.89424,0.89624,0.894933,0.000925,20
6,744.75899,12.26704,22.109016,0.71826,0.001,1.0,rbf,"{'C': 0.001, 'gamma': 1, 'kernel': 'rbf'}",0.52792,0.528,0.528,0.527973,3.8e-05,34
7,336.700046,13.488006,6.584439,0.121813,0.001,1.0,linear,"{'C': 0.001, 'gamma': 1, 'kernel': 'linear'}",0.89432,0.89424,0.89624,0.894933,0.000925,20
8,691.346408,2.425049,28.863946,2.28311,0.001,10.0,rbf,"{'C': 0.001, 'gamma': 10, 'kernel': 'rbf'}",0.52792,0.528,0.528,0.527973,3.8e-05,34
9,324.638821,6.202079,7.033309,0.384668,0.001,10.0,linear,"{'C': 0.001, 'gamma': 10, 'kernel': 'linear'}",0.89432,0.89424,0.89624,0.894933,0.000925,20


In [54]:
pred_train_gs = gs_svc.predict(X_train_scaled)
pred_test_gs = gs_svc.predict(X_test_scaled)

In [55]:
accuracy_score(y_train,pred_train_gs),accuracy_score(y_test,pred_test_gs)

(0.9100266666666667, 0.91568)

In [56]:
data = pd.read_csv('data/type_data.csv')

In [57]:
data = pd.get_dummies(data, columns = ['Type 1_first','Type 1_second','Type 2_first','Type 2_second'])

In [58]:
X_type = data.drop('Winner_mark',axis=1)
y_type = data['Winner_mark']

In [59]:
X_type_train,X_type_test,y_type_train,y_type_test = train_test_split(X_type,y_type,stratify=y_type,random_state=1)

In [61]:
svc = SVC(kernel='linear',random_state=1,C=0.01)

In [62]:
svc.fit(X_type_train,y_type_train)

SVC(C=0.01, kernel='linear', random_state=1)

In [63]:
pred_type_train = svc.predict(X_type_train)
pred_type_test = svc.predict(X_type_test)
accuracy_score(y_type_train,pred_type_train),accuracy_score(y_type_test,pred_type_test)

(0.5365066666666667, 0.5348)

In [None]:
# rbf type 

In [64]:
rbf_svc = SVC(kernel='rbf',
             C=1,
             gamma = 0.01,
             probability=True,
             random_state=1)

In [65]:
rbf_svc.fit(X_type_train,y_type_train)

SVC(C=1, gamma=0.01, probability=True, random_state=1)

In [66]:
pred_type_train_rbf= rbf_svc.predict(X_type_train)
pred_type_test_rbf = rbf_svc.predict(X_type_test)

In [67]:
accuracy_score(y_type_train,pred_type_train_rbf), accuracy_score(y_type_test,pred_type_test_rbf)

(0.66648, 0.51192)

In [None]:
# pipeline

In [None]:
pipleline_svc = Pipeline([('scaler',StandardScaler()),('svc',SVC())],verbose=True)

In [None]:
param_grid = {
    'kernel':['rbf','linear'],
    'svc_C' : [0.001,0.01,0.1,1],
    'svc_gamma':[0.001,0.01,0.1,1,10]
}
gridsearch_svc = GridSearchCV(pipeline_svc,
                           param_grid = param_grid,
                           scoring = 'accuracy',
                           cv = 3,
                           n_jobs=-1)
gridsearch_svc.fit()
pred_train = gridsearch_svc.predict()
pred_test = gridsearch_svc.predict()

accuracy_score(y_train, pred_train), accuracy_score(y_test, pred_test)

In [90]:
#categorical

In [91]:
data_categorical = pd.read_csv('data/data_categorical.csv')

In [92]:
data_categorical = pd.get_dummies(data_categorical, columns = ['Type 1_first','Type 1_second','Type 2_first','Type 2_second','Legendary_first','Legendary_second','Generation_first','Generation_second'])

In [93]:
X_cate = data_categorical.drop('Winner_mark',axis=1)
y_cate = data_categorical['Winner_mark']

In [97]:
X_cate_train,X_cate_test,y_cate_train,y_cate_test = train_test_split(X_cate,y_cate,stratify=y_cate,random_state=1)

In [98]:
scaler_cate = StandardScaler()

In [99]:
X_cate_train_scaled = scaler_num.fit_transform(X_cate_train)
X_cate_test_scaled = scaler_num.fit_transform(X_cate_test)

In [100]:
svc = SVC(kernel='linear',random_state=1,C=0.01)

In [101]:
svc.fit(X_cate_train,y_cate_train)

SVC(C=0.01, kernel='linear', random_state=1)

In [102]:
pred_cate_train = svc.predict(X_cate_train)
pred_cate_test = svc.predict(X_cate_test)
accuracy_score(y_cate_train,pred_cate_train),accuracy_score(y_cate_test,pred_cate_test)

(0.6578133333333334, 0.66064)

In [68]:
# numerical

In [81]:
data_numerical = pd.read_csv('data/pokemon_df_rf.csv')

In [82]:
data_numerical = data_numerical.drop(['Generation_first','Generation_second','Legendary_first','Legendary_second'],axis=1)

In [83]:
X_num = data_numerical.drop('Winner_mark',axis=1)
y_num = data_numerical['Winner_mark']

In [84]:
X_num_train,X_num_test,y_num_train,y_num_test = train_test_split(X_num,y_num,stratify=y_num,random_state=1)

In [85]:
scaler_num = StandardScaler()

In [86]:
X_num_train_scaled = scaler_num.fit_transform(X_num_train)
X_num_test_scaled = scaler_num.fit_transform(X_num_test)

In [87]:
svc = SVC(kernel='linear',random_state=1,C=0.01)

In [88]:
svc.fit(X_num_train,y_num_train)

SVC(C=0.01, kernel='linear', random_state=1)

In [89]:
pred_num_train = svc.predict(X_num_train)
pred_num_test = svc.predict(X_num_test)
accuracy_score(y_num_train,pred_num_train),accuracy_score(y_num_test,pred_num_test)

(0.91184, 0.91696)

In [None]:
#numerical+legendary

In [103]:
data_numerical_lg = pd.read_csv('data/data_numerical_lg.csv')

In [104]:
X_num_lg = data_numerical_lg.drop('Winner_mark',axis=1)
y_num_lg = data_numerical_lg['Winner_mark']

In [105]:
X_num_lg_train,X_num_lg_test,y_num_lg_train,y_num_lg_test = train_test_split(X_num_lg,y_num_lg,stratify=y_num,random_state=1)

In [106]:
scaler_num_lg = StandardScaler()

In [107]:
X_num_lg_train_scaled = scaler_num_lg.fit_transform(X_num_lg_train)
X_num_lg_test_scaled = scaler_num_lg.fit_transform(X_num_lg_test)

In [108]:
svc = SVC(kernel='linear',random_state=1,C=0.01)

In [109]:
svc.fit(X_num_lg_train,y_num_lg_train)

SVC(C=0.01, kernel='linear', random_state=1)

In [110]:
pred_num_lg_train = svc.predict(X_num_lg_train)
pred_num_lg_test = svc.predict(X_num_lg_test)
accuracy_score(y_num_lg_train,pred_num_lg_train),accuracy_score(y_num_lg_test,pred_num_lg_test)

(0.9113333333333333, 0.91664)

In [None]:
# numerical_lg_Type1

In [118]:
from sklearn.pipeline import Pipeline

In [133]:
data_num_lg_type1 = pd.read_csv('data/data_num_lg_type1.csv')
data_num_lg_type1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   Type 1_first      50000 non-null  int64
 1   Type 1_second     50000 non-null  int64
 2   Legendary_first   50000 non-null  int64
 3   Legendary_second  50000 non-null  int64
 4   HP_diff           50000 non-null  int64
 5   Attack_diff       50000 non-null  int64
 6   Defense_diff      50000 non-null  int64
 7   Sp. Atk_diff      50000 non-null  int64
 8   Sp. Def_diff      50000 non-null  int64
 9   Speed_diff        50000 non-null  int64
 10  Winner_mark       50000 non-null  int64
dtypes: int64(11)
memory usage: 4.2 MB


In [125]:
data_num_lg_type1 = pd.get_dummies(data_num_lg_type1,columns=['Type 1_first','Type 1_second'])

In [130]:
data_num_lg_type1

Unnamed: 0,Legendary_first,Legendary_second,HP_diff,Attack_diff,Defense_diff,Sp. Atk_diff,Sp. Def_diff,Speed_diff,Winner_mark,Type 1_first_0,...,Type 1_second_8,Type 1_second_9,Type 1_second_10,Type 1_second_11,Type 1_second_12,Type 1_second_13,Type 1_second_14,Type 1_second_15,Type 1_second_16,Type 1_second_17
0,0,0,-20,-6,10,-15,10,-19,2,0,...,0,1,0,0,0,0,0,0,0,0
1,1,1,0,-39,-18,18,39,0,2,0,...,0,0,0,0,0,0,0,1,0,0
2,0,0,-20,-35,10,-45,10,0,2,0,...,0,0,0,0,0,0,1,0,0,0
3,0,0,-37,-80,-50,10,-50,-28,2,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,50,50,-105,105,-160,50,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,1,0,70,80,30,80,95,30,1,0,...,0,0,0,0,0,0,0,0,0,1
49996,0,0,25,30,0,-15,5,8,1,0,...,0,0,0,0,0,0,0,0,0,0
49997,0,0,-13,-65,40,25,10,-25,2,0,...,0,0,0,0,1,0,0,0,0,0
49998,0,0,15,-5,-20,-40,0,55,1,0,...,0,0,0,0,0,0,0,0,0,0


In [134]:
X_nlt1 = data_num_lg_type1.drop('Winner_mark',axis=1)
y_nlt1 = data_num_lg_type1['Winner_mark']

In [135]:
X_nlt1_train, X_nlt1_test,y_nlt1_train,y_nlt1_test = train_test_split(X_nlt1,y_nlt1,stratify=y_nlt1,random_state=1)

In [136]:
pipeline_svc_proba = Pipeline([('scaler',StandardScaler()),('svc',SVC(probability=True))],verbose=True)
pipeline_svc_proba.steps

[('scaler', StandardScaler()), ('svc', SVC(probability=True))]

In [137]:
pipeline_svc_proba.fit(X_nlt1_train,y_nlt1_train)

[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ............... (step 2 of 2) Processing svc, total= 4.5min


Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC(probability=True))],
         verbose=True)

In [138]:
pred_nlt1_train = pipeline_svc_proba.predict(X_nlt1_train)
pred_nlt1_test = pipeline_svc_proba.predict(X_nlt1_test)

In [132]:
# 원핫인코딩 했을때
accuracy_score(y_nlt1_train,pred_nlt1_train),accuracy_score(y_nlt1_test,pred_nlt1_test)

(0.9227466666666667, 0.91296)

In [139]:
# 원핫인코딩 안했을때
accuracy_score(y_nlt1_train,pred_nlt1_train),accuracy_score(y_nlt1_test,pred_nlt1_test)

(0.93016, 0.92616)

In [None]:
# numerical_lg_Type2

In [140]:
data_num_lg_type2 = pd.read_csv('data/data_num_lg_type2.csv')
data_num_lg_type2

Unnamed: 0,Type 2_first,Type 2_second,Legendary_first,Legendary_second,HP_diff,Attack_diff,Defense_diff,Sp. Atk_diff,Sp. Def_diff,Speed_diff,Winner_mark
0,10,1,0,0,-20,-6,10,-15,10,-19,2
1,5,5,1,1,0,-39,-18,18,39,0,2
2,7,12,0,0,-20,-35,10,-45,10,0,2
3,12,12,0,0,-37,-80,-50,10,-50,-28,2
4,18,16,0,0,50,50,-105,105,-160,50,1
...,...,...,...,...,...,...,...,...,...,...,...
49995,6,12,1,0,70,80,30,80,95,30,1
49996,12,12,0,0,25,30,0,-15,5,8,1
49997,7,12,0,0,-13,-65,40,25,10,-25,2
49998,12,17,0,0,15,-5,-20,-40,0,55,1


In [None]:
data_num_lg_type2= pd.get_dummies(data_num_lg_type2,columns=['Type 2_first','Type 2_second'])

In [141]:
X_nlt2 = data_num_lg_type2.drop('Winner_mark',axis=1)
y_nlt2 = data_num_lg_type2['Winner_mark']

In [142]:
X_nlt2_train, X_nlt2_test,y_nlt2_train,y_nlt2_test = train_test_split(X_nlt2,y_nlt2,stratify=y_nlt2,random_state=1)

In [143]:
pipeline_svc_proba = Pipeline([('scaler',StandardScaler()),('svc',SVC(probability=True))],verbose=True)
pipeline_svc_proba.steps

[('scaler', StandardScaler()), ('svc', SVC(probability=True))]

In [144]:
pipeline_svc_proba.fit(X_nlt2_train,y_nlt2_train)

[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ............... (step 2 of 2) Processing svc, total= 3.6min


Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC(probability=True))],
         verbose=True)

In [145]:
pred_nlt2_train = pipeline_svc_proba.predict(X_nlt2_train)
pred_nlt2_test = pipeline_svc_proba.predict(X_nlt2_test)

In [146]:
# 원핫인코딩 안함
accuracy_score(y_nlt2_train,pred_nlt2_train),accuracy_score(y_nlt2_test,pred_nlt2_test)

(0.9289066666666667, 0.9248)

In [None]:
# speed+legendary

In [147]:
data_num_lg_type2 = pd.read_csv('data/data_num_lg_type2.csv')
data_num_lg_type2

Unnamed: 0,Type 2_first,Type 2_second,Legendary_first,Legendary_second,HP_diff,Attack_diff,Defense_diff,Sp. Atk_diff,Sp. Def_diff,Speed_diff,Winner_mark
0,10,1,0,0,-20,-6,10,-15,10,-19,2
1,5,5,1,1,0,-39,-18,18,39,0,2
2,7,12,0,0,-20,-35,10,-45,10,0,2
3,12,12,0,0,-37,-80,-50,10,-50,-28,2
4,18,16,0,0,50,50,-105,105,-160,50,1
...,...,...,...,...,...,...,...,...,...,...,...
49995,6,12,1,0,70,80,30,80,95,30,1
49996,12,12,0,0,25,30,0,-15,5,8,1
49997,7,12,0,0,-13,-65,40,25,10,-25,2
49998,12,17,0,0,15,-5,-20,-40,0,55,1


In [150]:
speed_leg = data_num_lg_type2[['Speed_diff','Legendary_first','Legendary_second','Winner_mark']]

In [151]:
X_sl = speed_leg.drop('Winner_mark',axis=1)
y_sl = speed_leg['Winner_mark']

In [153]:
X_sl_train,X_sl_test,y_sl_train,y_sl_test = train_test_split(X_sl,y_sl,stratify=y_sl,random_state=1)

In [154]:
pipeline_svc_proba = Pipeline([('scaler',StandardScaler()),('svc',SVC(probability=True))],verbose=True)
pipeline_svc_proba.steps

[('scaler', StandardScaler()), ('svc', SVC(probability=True))]

In [155]:
pipeline_svc_proba.fit(X_sl_train,y_sl_train)

[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ............... (step 2 of 2) Processing svc, total= 1.7min


Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC(probability=True))],
         verbose=True)

In [156]:
pred_sl_train = pipeline_svc_proba.predict(X_sl_train)
pred_sl_test = pipeline_svc_proba.predict(X_sl_test)

In [157]:
accuracy_score(y_sl_train,pred_sl_train),accuracy_score(y_sl_test,pred_sl_test)

(0.9333066666666666, 0.93752)