Import necessary packages.

In [41]:
import numpy as np
import pandas as pd
import matplotlib as plt 
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from math import sqrt


Import our two datasets.

In [2]:
pokemon = pd.read_csv('https://raw.githubusercontent.com/linr5/pokemon/master/pokemon.csv')
combats = pd.read_csv('https://raw.githubusercontent.com/linr5/pokemon/master/combats.csv')

Create new dataframe that takes into consideration the winrate of each Pokemon.

In [3]:
# start creating fields for total battles and total battles won
num_first_battles = combats["First_pokemon"].value_counts() # counts qty as first pokemon
num_second_battles = combats["Second_pokemon"].value_counts() # counts qty as second pokemon
num_total_battles = num_first_battles + num_second_battles #adds together 1st+2nd pokemon qty
num_total_won = combats["Winner"].value_counts() # counts qty as winner

# create new dataframe that displays winrate stats
battle_stats = pd.DataFrame({"total_battles": num_total_battles, "total_won": num_total_won}, columns=["total_battles", "total_won"])

# create new variable/column for winrate by dividing total battles won by total battles appeared in
# add this to our new battle_stats dataframe
battle_stats["winrate"] = battle_stats["total_won"]/battle_stats["total_battles"] * 100

# identify pokemon that always lost and fill NaN with 0
battle_stats.fillna(0, inplace=True)

# create ID column for pokemon then rearrange so ID is leftmost
battle_stats["ID"] = battle_stats.index
cols = list(battle_stats)
cols.insert(0, cols.pop(cols.index('ID')))
battle_stats = battle_stats.loc[:, cols]

# output results of our new battle stats
battle_stats.sort_values(by=['winrate'], ascending=False).head(3)

Unnamed: 0,ID,total_battles,total_won,winrate
155,155,129,127.0,98.449612
513,513,119,116.0,97.478992
704,704,125,121.0,96.8


For later purposes, we will create dictionaries. These will be used so that we can readily add the corresponding values to new columns we create based on matching Pokemon ID. Think of VLOOKUP and how it's used in Excel-- this is similar.

In [4]:
# Create new dictionaries that we will use to fill values for columns in our new dataframe
# This new dataframe will be called poke_data

namesDict = dict(zip(pokemon['ID'], pokemon['Name']))
hpDict = dict(zip(pokemon['ID'], pokemon['HP']))
attackDict = dict(zip(pokemon['ID'], pokemon['Attack']))
defenseDict = dict(zip(pokemon['ID'], pokemon['Defense']))
spattackDict = dict(zip(pokemon['ID'], pokemon['SpAtk']))
spdefenseDict = dict(zip(pokemon['ID'], pokemon['SpDef']))
speedDict = dict(zip(pokemon['ID'], pokemon['Speed']))
generationDict = dict(zip(pokemon['ID'], pokemon['Generation']))
legendaryDict = dict(zip(pokemon['ID'], pokemon['Legendary']))
type1Dict = dict(zip(pokemon['ID'], pokemon['Type 1']))
type2Dict = dict(zip(pokemon['ID'], pokemon['Type 2']))

Next up, we will create a new dataframe based off of our Pokemon dataframe. For this one particularly, it will only take into account quantitative variables like our stats. We will save the qualitative ones (i.e. Type 1, Type 2, Generation, and Legendary) for later.

In [5]:
# create new dataframe that merges pokemon df with battle_stats df
poke_corr = pd.merge(pokemon, battle_stats, right_index=True, left_on='ID')

# drop some unnecessary columns and rename ID column
poke_corr.drop(["ID_x", "Name", "Type 1", "Type 2", "Generation", "Legendary",
               "ID_y", "total_battles", "total_won"], axis=1, inplace=True)

# create a new dictionary for winrate that we will use later for our poke_data df
winrateDict = dict(zip(poke_corr['ID'], poke_corr['winrate']))

# output correlation map
poke_corr.corr()

Unnamed: 0,ID,HP,Attack,Defense,SpAtk,SpDef,Speed,winrate
ID,1.0,0.103511,0.105296,0.095125,0.089596,0.081203,0.009309,0.0612
HP,0.103511,1.0,0.420464,0.248069,0.366261,0.38557,0.183813,0.261602
Attack,0.105296,0.420464,1.0,0.442344,0.398339,0.265884,0.385933,0.502825
Defense,0.095125,0.248069,0.442344,1.0,0.221042,0.508399,0.011475,0.114565
SpAtk,0.089596,0.366261,0.398339,0.221042,1.0,0.503077,0.473344,0.481445
SpDef,0.081203,0.38557,0.265884,0.508399,0.503077,1.0,0.254372,0.302422
Speed,0.009309,0.183813,0.385933,0.011475,0.473344,0.254372,1.0,0.938055
winrate,0.0612,0.261602,0.502825,0.114565,0.481445,0.302422,0.938055,1.0


Now that we have a better idea of our independent variables, let's move onto constructing the features we will use in our models. For now, we will include all of the stats as independent variables. We will create these features by separating out the stats of the winning Pokemon and the losing Pokemon.

In [6]:
# copy combats dataset into poke_data df
poke_data = combats.copy()

# add feature fields for first pokemon
poke_data["Winrate_1"] = poke_data["First_pokemon"].replace(winrateDict) #first pokemon winrate
poke_data["Attack_1"] = poke_data["First_pokemon"].replace(attackDict) # first pokemon attack
poke_data["SpAtk_1"] = poke_data["First_pokemon"].replace(spattackDict) # first pokemon attack
poke_data["Speed_1"] = poke_data["First_pokemon"].replace(speedDict) # first pokemon speed
poke_data["HP_1"] = poke_data["First_pokemon"].replace(hpDict) # first pokemon speed
poke_data["Def_1"] = poke_data["First_pokemon"].replace(defenseDict) # first pokemon defense
poke_data["SpDef_1"] = poke_data["First_pokemon"].replace(spdefenseDict) # first pokemon sp defense
poke_data["isLegendary_1"] = poke_data["First_pokemon"].replace(legendaryDict) #1st pokemon legend

# add feature fields for second pokemon
poke_data["Winrate_2"] = poke_data["Second_pokemon"].replace(winrateDict) # second pokemon winrate
poke_data["Attack_2"] = poke_data["Second_pokemon"].replace(attackDict) # second pokemon attack
poke_data["SpAtk_2"] = poke_data["Second_pokemon"].replace(spattackDict) # second pokemon attack
poke_data["Speed_2"] = poke_data["Second_pokemon"].replace(speedDict) # second pokemon speed
poke_data["HP_2"] = poke_data["Second_pokemon"].replace(hpDict) # 2nd pokemon speed
poke_data["Def_2"] = poke_data["Second_pokemon"].replace(defenseDict) # 2nd pokemon defense
poke_data["SpDef_2"] = poke_data["Second_pokemon"].replace(spdefenseDict) # 2nd pokemon sp defense
poke_data["isLegendary_2"] = poke_data["Second_pokemon"].replace(legendaryDict) #2nd pokemon legend

Now, we will create another feature called FirstWin that will determine if the first Pokemon won or not. We will then drop the unnecessary fields like First_pokemon, Second_pokemon, and Winner. 

To do this, we'll first create a function that will return True/False if the first Pokemon won.

In [7]:
# define function that takes in a df and returns T/F values in a column if first Pokemon won
def firstwin(poke_data):
    if poke_data["First_pokemon"] == poke_data["Winner"]:
        return True
    return False

In [8]:
# create new column that defines if the first Pokemon won or not
# then apply the function we just created
poke_data["FirstWin"] = poke_data.apply(firstwin, axis=1)

# delete unnecessary columns for Pokemon IDs and Winner ID

# create new df for poke_data
poke_data1 = poke_data.copy()
poke_data1.drop(["First_pokemon", "Second_pokemon", "Winner"], axis=1, inplace=True)
poke_data1.head(3)

Unnamed: 0,Winrate_1,Attack_1,SpAtk_1,Speed_1,HP_1,Def_1,SpDef_1,isLegendary_1,Winrate_2,Attack_2,SpAtk_2,Speed_2,HP_2,Def_2,SpDef_2,isLegendary_2,FirstWin
0,22.727273,64,45,41,50,50,50,False,42.477876,70,60,60,70,40,40,False,False
1,89.6,90,90,108,91,72,129,True,85.365854,129,72,108,91,90,90,True,False
2,23.622047,40,80,40,55,85,105,False,15.671642,75,125,40,75,75,95,False,False


Now, let's prepare our train and test datasets.

In [9]:
# prepare train and test
X = poke_data.drop("FirstWin", axis=1)
y = poke_data["FirstWin"]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, 
                                                    test_size=0.2, random_state=1)

We will use Gaussian Naive Bayes as a baseline model.

In [10]:
gaussnb = GaussianNB()
gaussnb.fit(X_train, y_train)
gaussnb_score = round(gaussnb.score(X_test, y_test) * 100,2)
print("Gaussian Naive Bayes Score: " + str(gaussnb_score))

Gaussian Naive Bayes Score: 82.82


Next, let's try using regression and see how accurate it is. For our purposes, we will try out Random Forest Regressor and Logistic Regression.

In [11]:
regr = RandomForestRegressor(random_state=0, n_estimators=10)
regr.fit(X_train, y_train)
rfreg_score = round(regr.score(X_train, y_train)*100,2)

logreg = LogisticRegression()
logreg.fit(X_train, y_train)
logreg_score = round(logreg.score(X_test, y_test)*100,2)

print("Random Forest Regression: " + str(rfreg_score))
print("Logistic Regression: " + str(logreg_score))

Random Forest Regression: 97.8
Logistic Regression: 88.85


Next, we can try out classification instead of regression. We will try out both Decision Tree Classifier and KNN Classifier.

In [12]:
dectree = DecisionTreeClassifier()
dectree.fit(X_train, y_train)
dectree_score = round(dectree.score(X_test, y_test)*100,2)

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, y_train)
knn_score = round(knn.score(X_test, y_test) * 100, 2)

print("Decision Tree Classifier: " + str(dectree_score))
print("KNeighbors Classifier: " + str(knn_score))

Decision Tree Classifier: 94.7
KNeighbors Classifier: 97.93


Now that we have our results, we can try some model tuning. Let's see what happens when we exclude the "weaker" independent variables like HP, defense, and special defense.

In [13]:
# create new df based on copy of previous
poke_data2 = poke_data.copy()

# remove features we no longer want in our model
poke_data2.drop(["HP_1", "Def_1", "SpDef_1", "HP_2", "Def_2", "SpDef_2"],
               axis=1, inplace=True)

# output results
poke_data2.head(3)

Unnamed: 0,First_pokemon,Second_pokemon,Winner,Winrate_1,Attack_1,SpAtk_1,Speed_1,isLegendary_1,Winrate_2,Attack_2,SpAtk_2,Speed_2,isLegendary_2,FirstWin
0,266,298,298,22.727273,64,45,41,False,42.477876,70,60,60,False,False
1,702,701,701,89.6,90,90,108,True,85.365854,129,72,108,True,False
2,191,668,668,23.622047,40,80,40,False,15.671642,75,125,40,False,False


In [14]:
# prepare train and test
X1 = poke_data2.drop("FirstWin", axis=1)
y1 = poke_data2["FirstWin"]
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, train_size=0.8, 
                                                    test_size=0.2, random_state=1)

In [15]:
gaussnb = GaussianNB()
gaussnb.fit(X1_train, y1_train)
gaussnb_score = round(gaussnb.score(X1_test, y1_test) * 100,2)
print("Gaussian Naive Bayes Score: " + str(gaussnb_score))

Gaussian Naive Bayes Score: 84.61


In [16]:
# test results of regression
regr = RandomForestRegressor(random_state=0, n_estimators=10)
regr.fit(X1_train, y1_train)
rfreg_score = round(regr.score(X1_train, y1_train)*100,2)

logreg = LogisticRegression()
logreg.fit(X1_train, y1_train)
logreg_score = round(logreg.score(X1_test, y1_test)*100,2)

print("Random Forest Regression: " + str(rfreg_score))
print("Logistic Regression: " + str(logreg_score))

Random Forest Regression: 98.0
Logistic Regression: 88.89


In [17]:
# test results of classification
dectree = DecisionTreeClassifier()
dectree.fit(X1_train, y1_train)
dectree_score = round(dectree.score(X1_test, y1_test)*100,2)

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X1_train, y1_train)
knn_score = round(knn.score(X1_test, y1_test) * 100, 2)

print("Decision Tree Classifier: " + str(dectree_score))
print("KNeighbors Classifier: " + str(knn_score))

Decision Tree Classifier: 94.99
KNeighbors Classifier: 98.48


Now, let's try out our next modeling approach that will drop the legendary features. We want to see if this improves accuracy or not.

In [18]:
# make a copy of the previous model into a new df
# we now want to experiment with dropping the legendary feature
poke_data3 = poke_data2.copy()
poke_data3.drop(["isLegendary_1", "isLegendary_2"],axis=1,inplace=True)
poke_data3.head(3)

Unnamed: 0,First_pokemon,Second_pokemon,Winner,Winrate_1,Attack_1,SpAtk_1,Speed_1,Winrate_2,Attack_2,SpAtk_2,Speed_2,FirstWin
0,266,298,298,22.727273,64,45,41,42.477876,70,60,60,False
1,702,701,701,89.6,90,90,108,85.365854,129,72,108,False
2,191,668,668,23.622047,40,80,40,15.671642,75,125,40,False


In [19]:
# prepare train and test
X2 = poke_data3.drop("FirstWin", axis=1)
y2 = poke_data3["FirstWin"]
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, train_size=0.8, 
                                                    test_size=0.2, random_state=1)

In [20]:
# gaussian baseline model
gaussnb = GaussianNB()
gaussnb.fit(X2_train, y2_train)
gaussnb_score = round(gaussnb.score(X2_test, y2_test) * 100,2)
print("Gaussian Naive Bayes Score: " + str(gaussnb_score))

Gaussian Naive Bayes Score: 86.06


In [21]:
# random forest regression model
regr = RandomForestRegressor(random_state=0, n_estimators=10)
regr.fit(X2_train, y2_train)
rfreg_score = round(regr.score(X2_train, y2_train)*100,2)

logreg = LogisticRegression()
logreg.fit(X2_train, y2_train)
logreg_score = round(logreg.score(X2_test, y2_test)*100,2)

print("Random Forest Regression: " + str(rfreg_score))
print("Logistic Regression: " + str(logreg_score))

Random Forest Regression: 98.06
Logistic Regression: 88.85


In [22]:
# test results of classification
# decision tree classifier
dectree = DecisionTreeClassifier()
dectree.fit(X2_train, y2_train)
dectree_score = round(dectree.score(X2_test, y2_test)*100,2)

# KNN classifier
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X2_train, y2_train)
knn_score = round(knn.score(X2_test, y2_test) * 100, 2)

print("Decision Tree Classifier: " + str(dectree_score))
print("KNeighbors Classifier: " + str(knn_score))


Decision Tree Classifier: 95.09
KNeighbors Classifier: 98.48


Now that we are finished with that model, let's create a new scenario in which we both drop the legendary feature and account for type advantage.

In [33]:
# create a new df to now test the effects of adjusted attack
poke_data4 = poke_data2.copy()

# add in primary types
# these will only be used to populate the typeAdvantage field
# will be deleted later on when we test the model
poke_data4["Type1_1"] = poke_data4["First_pokemon"].replace(type1Dict)
poke_data4["Type1_2"] = poke_data4["Second_pokemon"].replace(type1Dict)
poke_data4.head(3)

Unnamed: 0,First_pokemon,Second_pokemon,Winner,Winrate_1,Attack_1,SpAtk_1,Speed_1,isLegendary_1,Winrate_2,Attack_2,SpAtk_2,Speed_2,isLegendary_2,FirstWin,Type1_1,Type1_2
0,266,298,298,22.727273,64,45,41,False,42.477876,70,60,60,False,False,Rock,Grass
1,702,701,701,89.6,90,90,108,True,85.365854,129,72,108,True,False,Grass,Rock
2,191,668,668,23.622047,40,80,40,False,15.671642,75,125,40,False,False,Fairy,Psychic


In [24]:
# define a function that returns the following:
# not too effective (0.5), super effective (2.0), normal damage (1.0)
def typeAdvantage(poke_data):
    if poke_data["Type1_1"] == "Bug":
        if poke_data["Type1_2"] == "Dark" or poke_data["Type1_2"] == "Grass" or poke_data["Type1_2"] == "Psychic":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Dark":
        if poke_data["Type1_2"] == "Ghost" or poke_data["Type1_2"] == "Psychic":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Dragon":
        if poke_data["Type1_2"] == "Dragon":
            return 2.0
        elif poke_data["Type1_2"] == "Fairy":
            return 0.5
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Electric":
        if poke_data["Type1_2"] == "Flying" or poke_data["Type1_2"] == "Water":
            return 2.0
        elif poke_data["Type1_2"] == "Ground":
            return 0.5
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Fairy":
        if poke_data["Type1_2"] == "Dark" or poke_data["Type1_2"] == "Dragon" or poke_data["Type1_2"] == "Fighting":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Fighting":
        if poke_data["Type1_2"] == "Dark" or poke_data["Type1_2"] == "Ice" or poke_data["Type1_2"] == "Normal" or poke_data["Type1_2"] == "Rock" or poke_data["Type1_2"] == "Steel":
            return 2.0
        elif poke_data["Type1_2"] == "Ghost":
            return 0.5
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Fire":
        if poke_data["Type1_2"] == "Bug" or poke_data["Type1_2"] == "Grass" or poke_data["Type1_2"] == "Ice" or poke_data["Type1_2"] == "Steel":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Flying":
        if poke_data["Type1_2"] == "Bug" or poke_data["Type1_2"] == "Fighting" or poke_data["Type1_2"] == "Grass":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Ghost":
        if poke_data["Type1_2"] == "Ghost" or poke_data["Type1_2"] == "Psychic":
            return 2.0
        elif poke_data["Type1_2"] == "Normal":
            return 0.5
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Grass":
        if poke_data["Type1_2"] == "Ground" or poke_data["Type1_2"] == "Rock" or poke_data["Type1_2"] == "Water":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Ground":
        if poke_data["Type1_2"] == "Electric" or poke_data["Type1_2"] == "Fire" or poke_data["Type1_2"] == "Poison" or poke_data["Type1_2"] == "Rock" or poke_data["Type1_2"] == "Steel":
            return 2.0
        elif poke_data["Type1_2"] == "Flying":
            return 0.5
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Ice":
        if poke_data["Type1_2"] == "Dragon" or poke_data["Type1_2"] == "Flying" or poke_data["Type1_2"] == "Grass" or poke_data["Type1_2"] == "Ground":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Normal":
        if poke_data["Type1_2"] == "Ghost":
            return 0.5
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Poison":
        if poke_data["Type1_2"] == "Fairy" or poke_data["Type1_2"] == "Grass":
            return 2.0
        elif poke_data["Type1_2"] == "Steel":
            return 0.5
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Psychic":
        if poke_data["Type1_2"] == "Fighting" or poke_data["Type1_2"] == "Poison":
            return 2.0
        elif poke_data["Type1_2"] == "Dark":
            return 0.5
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Rock":
        if poke_data["Type1_2"] == "Bug" or poke_data["Type1_2"] == "Fire" or poke_data["Type1_2"] == "Flying" or poke_data["Type1_2"] == "Ice":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Steel":
        if poke_data["Type1_2"] == "Fairy" or poke_data["Type1_2"] == "Ice" or poke_data["Type1_2"] == "Rock":
            return 2.0
        else:
            return 1.0
    elif poke_data["Type1_1"] == "Water":
        if poke_data["Type1_2"] == "Fire" or poke_data["Type1_2"] == "Ground" or poke_data["Type1_2"] == "Rock":
            return 2.0
        else:
            return 1.0
    return 1.0


In [34]:
# apply function to typeAdvantage column
poke_data4["typeAdvantage"] = poke_data4.apply(typeAdvantage, axis=1)
poke_data4.head(3)

Unnamed: 0,First_pokemon,Second_pokemon,Winner,Winrate_1,Attack_1,SpAtk_1,Speed_1,isLegendary_1,Winrate_2,Attack_2,SpAtk_2,Speed_2,isLegendary_2,FirstWin,Type1_1,Type1_2,typeAdvantage
0,266,298,298,22.727273,64,45,41,False,42.477876,70,60,60,False,False,Rock,Grass,1.0
1,702,701,701,89.6,90,90,108,True,85.365854,129,72,108,True,False,Grass,Rock,2.0
2,191,668,668,23.622047,40,80,40,False,15.671642,75,125,40,False,False,Fairy,Psychic,1.0


In [35]:
# create a new featuer column
# multiply attack of Pokemon 1 by the type advance multiplier
poke_data4["adjAttack_1"] = poke_data4["Attack_1"] * poke_data4["typeAdvantage"]

In [36]:
# delete unnecessary columns
poke_data4.drop(["Attack_1", "Type1_1", "Type1_2",
                "typeAdvantage"], axis=1, inplace=True)


In [27]:
# move some columns around
cols = list(poke_data4)
cols.insert(0, cols.pop(cols.index('adjAttack_1')))
poke_data4 = poke_data4.loc[:, cols]
poke_data4.head(3)

Unnamed: 0,adjAttack_1,Winrate_1,SpAtk_1,Speed_1,isLegendary_1,Winrate_2,Attack_2,SpAtk_2,Speed_2,isLegendary_2,FirstWin
0,64.0,22.727273,45,41,False,42.477876,70,60,60,False,False
1,180.0,89.6,90,108,True,85.365854,129,72,108,True,False
2,40.0,23.622047,80,40,False,15.671642,75,125,40,False,False


In [37]:
# prepare train and test
X3 = poke_data4.drop("FirstWin", axis=1)
y3 = poke_data4["FirstWin"]
X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y3, train_size=0.8, 
                                                    test_size=0.2, random_state=1)

In [40]:
# gaussian naive model for baseline
gaussnb = GaussianNB()
gaussnb.fit(X3_train, y3_train)
gaussnb_score = round(gaussnb.score(X3_test, y3_test) * 100,2)
print("Gaussian Naive Bayes Score: " + str(gaussnb_score))

Gaussian Naive Bayes Score: 84.87


In [38]:
# random forest regression
regr = RandomForestRegressor(random_state=0, n_estimators=10)
regr.fit(X3_train, y3_train)
rfreg_score = round(regr.score(X3_train, y3_train)*100,2)

# logistic regression
logreg = LogisticRegression()
logreg.fit(X3_train, y3_train)
logreg_score = round(logreg.score(X3_test, y3_test)*100,2)

print("Random Forest Regression: " + str(rfreg_score))
print("Logistic Regression: " + str(logreg_score))

Random Forest Regression: 98.12
Logistic Regression: 88.53


In [39]:
# decision tree classifier
dectree = DecisionTreeClassifier()
dectree.fit(X3_train, y3_train)
dectree_score = round(dectree.score(X3_test, y3_test)*100,2)

# knn classifier
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X3_train, y3_train)
knn_score = round(knn.score(X3_test, y3_test) * 100, 2)

print("Decision Tree Classifier: " + str(dectree_score))
print("KNeighbors Classifier: " + str(knn_score))


Decision Tree Classifier: 95.72
KNeighbors Classifier: 98.48
