In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC

In [2]:
data = pd.read_csv('data/pokemon_preprocessed.csv')

In [3]:
pokemon_df = data.drop(['Winner','Type 1_first','Type 1_second','Type 2_first','Type 2_second','#_x','#_y'],axis=1)
pokemon_df

Unnamed: 0,First_pokemon,Second_pokemon,Winner_mark,HP_first,Attack_first,Defense_first,Sp. Atk_first,Sp. Def_first,Speed_first,Generation_first,Legendary_first,HP_second,Attack_second,Defense_second,Sp. Atk_second,Sp. Def_second,Speed_second,Generation_second,Legendary_second
0,266,298,2,50,64,50,45,50,41,2,0,70,70,40,60,40,60,3,0
1,702,701,2,91,90,72,90,129,108,5,1,91,129,90,72,90,108,5,1
2,191,668,2,55,40,85,80,105,40,2,0,75,75,75,125,95,40,5,0
3,237,683,2,40,40,40,70,40,20,2,0,77,120,90,60,90,48,5,0
4,151,231,1,70,60,125,115,70,55,1,0,20,10,230,10,230,5,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,707,126,1,100,120,100,150,120,90,5,1,30,40,70,70,25,60,1,0
49996,589,664,1,60,85,40,30,45,68,5,0,35,55,40,45,40,60,5,0
49997,303,368,2,60,50,100,85,70,65,3,0,73,115,60,60,60,90,3,0
49998,109,89,1,40,30,50,55,55,100,1,0,25,35,70,95,55,45,1,0


In [4]:
pokemon_df.drop(['First_pokemon','Second_pokemon'],axis=1,inplace=True)

In [5]:
pokemon_df

Unnamed: 0,Winner_mark,HP_first,Attack_first,Defense_first,Sp. Atk_first,Sp. Def_first,Speed_first,Generation_first,Legendary_first,HP_second,Attack_second,Defense_second,Sp. Atk_second,Sp. Def_second,Speed_second,Generation_second,Legendary_second
0,2,50,64,50,45,50,41,2,0,70,70,40,60,40,60,3,0
1,2,91,90,72,90,129,108,5,1,91,129,90,72,90,108,5,1
2,2,55,40,85,80,105,40,2,0,75,75,75,125,95,40,5,0
3,2,40,40,40,70,40,20,2,0,77,120,90,60,90,48,5,0
4,1,70,60,125,115,70,55,1,0,20,10,230,10,230,5,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,1,100,120,100,150,120,90,5,1,30,40,70,70,25,60,1,0
49996,1,60,85,40,30,45,68,5,0,35,55,40,45,40,60,5,0
49997,2,60,50,100,85,70,65,3,0,73,115,60,60,60,90,3,0
49998,1,40,30,50,55,55,100,1,0,25,35,70,95,55,45,1,0


In [6]:
pokemon_df['HP_diff'] = pokemon_df['HP_first'] - pokemon_df['HP_second']
pokemon_df['Attack_diff'] = pokemon_df['Attack_first'] - pokemon_df['Attack_second']
pokemon_df['Defense_diff'] = pokemon_df['Defense_first'] - pokemon_df['Defense_second']
pokemon_df['Sp. Atk_diff'] = pokemon_df['Sp. Atk_first'] - pokemon_df['Sp. Atk_second']
pokemon_df['Sp. Def_diff'] = pokemon_df['Sp. Def_first'] - pokemon_df['Sp. Def_second']
pokemon_df['Speed_diff'] = pokemon_df['Speed_first'] - pokemon_df['Speed_second']
pokemon_df['Legendary_second'].astype(str)

0        0
1        1
2        0
3        0
4        0
        ..
49995    0
49996    0
49997    0
49998    0
49999    0
Name: Legendary_second, Length: 50000, dtype: object

In [10]:
pokemon_df['Generation_second'] = pokemon_df['Generation_second'].astype(str)

In [11]:
pokemon_df = pd.get_dummies(pokemon_df, columns=['Generation_first','Generation_second','Legendary_first','Legendary_second'])

In [12]:
pokemon_df

Unnamed: 0,Winner_mark,HP_first,Attack_first,Defense_first,Sp. Atk_first,Sp. Def_first,Speed_first,HP_second,Attack_second,Defense_second,...,Generation_second_1,Generation_second_2,Generation_second_3,Generation_second_4,Generation_second_5,Generation_second_6,Legendary_first_0,Legendary_first_1,Legendary_second_0,Legendary_second_1
0,2,50,64,50,45,50,41,70,70,40,...,0,0,1,0,0,0,1,0,1,0
1,2,91,90,72,90,129,108,91,129,90,...,0,0,0,0,1,0,0,1,0,1
2,2,55,40,85,80,105,40,75,75,75,...,0,0,0,0,1,0,1,0,1,0
3,2,40,40,40,70,40,20,77,120,90,...,0,0,0,0,1,0,1,0,1,0
4,1,70,60,125,115,70,55,20,10,230,...,0,1,0,0,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,1,100,120,100,150,120,90,30,40,70,...,1,0,0,0,0,0,0,1,1,0
49996,1,60,85,40,30,45,68,35,55,40,...,0,0,0,0,1,0,1,0,1,0
49997,2,60,50,100,85,70,65,73,115,60,...,0,0,1,0,0,0,1,0,1,0
49998,1,40,30,50,55,55,100,25,35,70,...,1,0,0,0,0,0,1,0,1,0


In [13]:
X = pokemon_df.drop('Winner_mark',axis=1)
y = pokemon_df['Winner_mark']

In [14]:
X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,random_state=1)

In [15]:
# scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
# linearsvc
svc = SVC(kernel='linear',random_state=1,C=0.01)

In [17]:
svc.fit(X_train_scaled,y_train)

SVC(C=0.01, kernel='linear', random_state=1)

In [18]:
pred_train = svc.predict(X_train_scaled)
pred_test = svc.predict(X_test_scaled)
accuracy_score(y_train,pred_train),accuracy_score(y_test,pred_test)

(0.90992, 0.9144)

In [20]:
rbf_svc = SVC(kernel='rbf',
             C=1,
             gamma = 0.01,
             probability=True,
             random_state=1)

In [21]:
rbf_svc.fit(X_train_scaled,y_train)

SVC(C=1, gamma=0.01, probability=True, random_state=1)

In [22]:
pred_train_rbf= rbf_svc.predict(X_train_scaled)
pred_test_rbf = rbf_svc.predict(X_test_scaled)

In [23]:
accuracy_score(y_train,pred_train_rbf), accuracy_score(y_test,pred_test_rbf)

(0.9116266666666667, 0.91224)

In [27]:
from sklearn.metrics import recall_score,precision_score
recall_score(y_train,pred_train_rbf),precision_score(y_train,pred_train_rbf)

(0.910513530309022, 0.9030649408864235)

In [26]:
from sklearn.metrics import recall_score,precision_score, roc_auc_score ,average_precision_score
pos_proba = rbf_svc.predict_proba(X_train_scaled)[:,1]
roc_auc_score(y_train,pos_proba),average_precision_score(y_train,pos_proba)

(0.9363329870372088, 0.2925676338215185)