In [None]:

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


Inspired By:

https://www.kaggle.com/rautaki0127/pokemon-data-science-challenge

# Libraries

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from wordcloud import WordCloud
from pandas_profiling import ProfileReport
from sklearn.model_selection import train_test_split

%matplotlib inline

# Import Combat DF

In [None]:
combats = pd.read_csv('../input/pokemon-challenge/combats.csv')
combats.head()

# Import Pokemon Stats

In [None]:
pokemon = pd.read_csv('../input/pokemon-challenge/pokemon.csv')
pokemon266_298 = pokemon[pokemon['#'].isin([266,298])]
pokemon266_298

# Create new combat df with names

In [None]:
names_dict = dict(zip(pokemon['#'], pokemon['Name']))
cols = ['First_pokemon', 'Second_pokemon', 'Winner']
combatsName = combats[cols].replace(names_dict)
combatsName.head()

# Analyse base stats for Pokemon



Add up all stats to see if there is a correlation between base stats and winning

In [None]:
pokemon['stats_sum'] = pokemon['HP'] + pokemon['Attack'] + pokemon['Defense'] + pokemon['Sp. Atk'] + pokemon['Sp. Def'] + pokemon['Speed']

print(pokemon['stats_sum'].describe())

statsSum = pokemon['stats_sum']

fig = go.Figure()
fig = fig.add_trace(go.Histogram(x=statsSum))
fig.update_traces(marker_color='#F40D0D', selector=dict(type='histogram'))

fig.update_layout(
    title_text='Total Stats Summary', # title of plot
    xaxis_title_text='Stat Total', # xaxis label
    yaxis_title_text='Count of Pokemon', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
)
fig.show()

Shows a dual binomial distro. Next we need to check the difference between base stats per fight.

In [None]:
statsSumDict = dict(zip(pokemon['#'], pokemon['stats_sum']))
combatStatsSum = combats[cols].replace(statsSumDict)
diffStats = abs(combatStatsSum['First_pokemon']-combatStatsSum['Second_pokemon'])
print(diffStats.describe())

In [None]:
fig = make_subplots(1,2, subplot_titles=('Total Stats Summary', 'Difference of Pokemon stats Probablility Density'))

statsSum = pokemon['stats_sum']

# Fig1 total Stats Summary
fig.add_trace(go.Histogram(x=statsSum, marker_color='green'), row=1, col=1)
fig.update_xaxes(title_text="Total Stats", row=1, col=1)
fig.update_yaxes(title_text='Count of Pokemon', row=1, col=1)

# Fig2 Create probablility Density Historgram with Trend Line
fig.add_trace(go.Histogram(x=diffStats, marker_color='#F40D0D'), row=1, col=2)
fig.update_traces(histnorm='probability density', row=1, col=2)
fig.update_xaxes(title_text="Total Stats Difference", row=1, col=2)
fig.update_yaxes(title_text='Density', row=1, col=2)


fig.update_layout(
    title_text='Total Stats Summary', # title of plot
    xaxis_title_text='Stat Total', # xaxis label
    yaxis_title_text='Count of Pokemon', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
)
fig.show()

print(diffStats.describe())

Distro above shows there are two binomial distribution curves in our "Total Stats" figure. 

The second figure shows the difference between the two pokemon stats within each fight has about 50% of the values being above 100 points.

This leads to the question:

# Did the pokemon with higher stats win against the pokemon with lower overall stats?

In [None]:
combatStatsSum['Loser'] = combatStatsSum.apply(lambda x: x['First_pokemon'] if x['First_pokemon'] != x['Winner'] else x['Second_pokemon'], axis=1)
diffWinLoseStats = combatStatsSum['Winner'] - combatStatsSum['Loser']
print(diffWinLoseStats.describe())

fig = go.Figure()
fig.add_trace(go.Histogram(x=diffWinLoseStats, marker_color='blue'))

fig.update_layout(
    title_text='Difference of Win/Loss overall Stats', # title of plot
    xaxis_title_text='Difference Total', # xaxis label
    yaxis_title_text='Count of Pokemon', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
)

fig.show()




Above shows the that 2/3rds of the time the higher total stats pokemon will defeat the lesser. 1/3 of the time the lower total stat pokemon bettered the higher stat. Lets look into those next

# Check the combo of battles where lower beat the higher by < -50

In [None]:
statsSumDict = dict(zip(pokemon['stats_sum'], pokemon['#']))
combatStatsSum['diff'] = diffWinLoseStats
betteredStatsSum = combatStatsSum[combatStatsSum['diff'] < -50]
print('Number of battles the lesser total stats won: ' + str(len(betteredStatsSum)))
betteredID = betteredStatsSum[cols].replace(statsSumDict)
betteredName = betteredID[cols].replace(names_dict)
betteredName.join(combatStatsSum['diff']).head()

The number of battles where the lesser defeated the higher and the defeat was more than 50, there are 9499 combonations.

In [None]:
print('The % of battles lost by higher stat pokemon > 50: ' + (str(len(betteredStatsSum) / len(combats))))

# Check the biggests difference

In [None]:
betteredName.join(combatStatsSum['diff']).sort_values(by='diff').head()

https://www.kaggle.com/rautaki0127/pokemon-data-science-challenge#63-pokemon's-name-is-NaN.-Searching-by-Internet,-#63-pokemon-is-%22Primeape%22.The biggests upset was 562 point difference between the lower stat winner and higher stat loser. 

# Check the Type Factors in battles

In [None]:
print('There are {} Types'.format(len(pokemon['Type 1'].drop_duplicates())))
list(pokemon['Type 1'].drop_duplicates())

In [None]:
typeCols = ['Type 1', 'Type 2']
print('There are {} unique type combos.'.format(len(pokemon[typeCols].drop_duplicates())))

In [None]:
pokemon['Type 2'] = pokemon['Type 2'].fillna('None')
typeCross = pd.crosstab(pokemon['Type 1'], pokemon['Type 2'])


fig = px.bar(typeCross, title="Pokemon Type Combos by Type 1").update_xaxes(categoryorder='total descending').update_layout(yaxis_title='Count of Combos')

fig.show()

In [None]:
typeCross2 = pd.crosstab(pokemon['Type 2'], pokemon['Type 1'])

fig = px.bar(typeCross2, title="Pokemon Type Combos by Type 2").update_xaxes(categoryorder='total descending').update_layout(yaxis_title='Count of Combos')

fig.show()

Single Type Chart

In [None]:
singleType = pd.DataFrame(pokemon[pokemon['Type 2'] == 'None'])
singleType = singleType.drop('Type 2', axis=1)

fig = px.histogram(singleType['Type 1'], title="Pokemon Count by Single Type").update_xaxes(categoryorder='total descending').update_layout(yaxis_title='Count of Combos')
fig.show()

# Type Adv/Disadv Chart

Found website that shows chart of type advatages https://pokemondb.net/type

Data Legend-

    0 = No effect
    
    .5 = Not Very Effective
    
    1 = Normal
    
    2 = Super-Effective
 
 Create dict() for each type

In [None]:
Normal = {"Normal": 1, "Fighting": 1, "Poison": 1, "Ground": 1, "Flying": 1, "Bug": 1, "Rock": 0.5, "Ghost": 0, "Steel": 0.5, "Fire": 1, "Water": 1, "Electric": 1, "Grass": 1, "Ice": 1, "Psychic": 1, "Dragon": 1, "Dark": 1, "Fairy": 1}
Fighting = {"Normal": 2, "Fighting": 1, "Poison": 0.5, "Ground": 1, "Flying": 0.5, "Bug": 0.5, "Rock": 2, "Ghost": 0, "Steel": 2, "Fire": 1, "Water": 1, "Electric": 1, "Grass": 1, "Ice": 2, "Psychic": 0.5, "Dragon": 1, "Dark": 2, "Fairy": 0.5}
Poison = {"Normal": 1, "Fighting": 1, "Poison": 0.5, "Ground": 0.5, "Flying": 1, "Bug": 1, "Rock": 0.5, "Ghost": 0.5, "Steel": 0, "Fire": 1, "Water": 1, "Electric": 1, "Grass": 2, "Ice": 1, "Psychic": 1, "Dragon": 1, "Dark": 1, "Fairy": 2}
Ground = {"Normal": 1, "Fighting": 1, "Poison": 2, "Ground": 1, "Flying": 0, "Bug": 0.5, "Rock": 2, "Ghost": 1, "Steel": 2, "Fire": 2, "Water": 1, "Electric": 2, "Grass": 0.5, "Ice": 1, "Psychic": 1, "Dragon": 1, "Dark": 1, "Fairy": 1}
Flying = {"Normal": 1, "Fighting": 2, "Poison": 1, "Ground": 1, "Flying": 1, "Bug": 2, "Rock": 0.5, "Ghost": 1, "Steel": 0.5, "Fire": 1, "Water": 1, "Electric": 0.5, "Grass": 2, "Ice": 1, "Psychic": 1, "Dragon": 1, "Dark": 1, "Fairy": 1}
Bug = {"Normal": 1, "Fighting": 0.5, "Poison": 0.5, "Ground": 1, "Flying": 0.5, "Bug": 1, "Rock": 1, "Ghost": 0.5, "Steel": 0.5, "Fire": 0.5, "Water": 1, "Electric": 1, "Grass": 2, "Ice": 1, "Psychic": 2, "Dragon": 1, "Dark": 2, "Fairy": 0.5}
Rock = {"Normal": 1, "Fighting": 0.5, "Poison": 1, "Ground": 0.5, "Flying": 2, "Bug": 2, "Rock": 1, "Ghost": 1, "Steel": 0.5, "Fire": 2, "Water": 1, "Electric": 1, "Grass": 1, "Ice": 2, "Psychic": 1, "Dragon": 1, "Dark": 1, "Fairy": 1}
Ghost = {"Normal": 0, "Fighting": 1, "Poison": 1, "Ground": 1, "Flying": 1, "Bug": 1, "Rock": 1, "Ghost": 2, "Steel": 1, "Fire": 1, "Water": 1, "Electric": 1, "Grass": 1, "Ice": 1, "Psychic": 2, "Dragon": 1, "Dark": 0.5, "Fairy": 1}
Steel = {"Normal": 1, "Fighting": 1, "Poison": 1, "Ground": 1, "Flying": 1, "Bug": 1, "Rock": 2, "Ghost": 1, "Steel": 0.5, "Fire": 0.5, "Water": 0.5, "Electric": 0.5, "Grass": 1, "Ice": 2, "Psychic": 1, "Dragon": 1, "Dark": 1, "Fairy": 0.5}
Fire = {"Normal": 1, "Fighting": 1, "Poison": 1, "Ground": 1, "Flying": 1, "Bug": 2, "Rock": 0.5, "Ghost": 1, "Steel": 2, "Fire": 0.5, "Water": 0.5, "Electric": 1, "Grass": 2, "Ice": 2, "Psychic": 1, "Dragon": 0.5, "Dark": 1, "Fairy": 1}
Water = {"Normal": 1, "Fighting": 1, "Poison": 1, "Ground": 2, "Flying": 1, "Bug": 1, "Rock": 2, "Ghost": 1, "Steel": 1, "Fire": 2, "Water": 0.5, "Electric": 1, "Grass": 0.5, "Ice": 1, "Psychic": 1, "Dragon": 0.5, "Dark": 1, "Fairy": 1}
Electric = {"Normal": 1, "Fighting": 1, "Poison": 1, "Ground": 0, "Flying": 2, "Bug": 1, "Rock": 1, "Ghost": 1, "Steel": 1, "Fire": 1, "Water": 2, "Electric": 0.5, "Grass": 0.5, "Ice": 1, "Psychic": 1, "Dragon": 0.5, "Dark": 1, "Fairy": 1}
Grass = {"Normal": 1, "Fighting": 1, "Poison": 0.5, "Ground": 2, "Flying": 0.5, "Bug": 0.5, "Rock": 2, "Ghost": 1, "Steel": 0.5, "Fire": 0.5, "Water": 2, "Electric": 1, "Grass": 0.5, "Ice": 1, "Psychic": 1, "Dragon": 0.5, "Dark": 1, "Fairy": 1}
Ice = {"Normal": 1, "Fighting": 1, "Poison": 1, "Ground": 2, "Flying": 2, "Bug": 1, "Rock": 1, "Ghost": 1, "Steel": 0.5, "Fire": 0.5, "Water": 0.5, "Electric": 1, "Grass": 2, "Ice": 0.5, "Psychic": 1, "Dragon": 2, "Dark": 1, "Fairy": 1}
Psychic = {"Normal": 1, "Fighting": 1, "Poison": 2, "Ground": 2, "Flying": 1, "Bug": 1, "Rock": 1, "Ghost": 1, "Steel": 0.5, "Fire": 1, "Water": 1, "Electric": 1, "Grass": 1, "Ice": 1, "Psychic": 0.5, "Dragon": 1, "Dark": 0, "Fairy": 1}
Dragon = {"Normal": 1, "Fighting": 1, "Poison": 1, "Ground": 1, "Flying": 1, "Bug": 1, "Rock": 1, "Ghost": 1, "Steel": 0.5, "Fire": 1, "Water": 1, "Electric": 1, "Grass": 1, "Ice": 1, "Psychic": 1, "Dragon": 2, "Dark": 1, "Fairy": 0}
Dark = {"Normal": 1, "Fighting": 0.5, "Poison": 1, "Ground": 1, "Flying": 1, "Bug": 1, "Rock": 1, "Ghost": 2, "Steel": 1, "Fire": 1, "Water": 1, "Electric": 1, "Grass": 1, "Ice": 1, "Psychic": 2, "Dragon": 1, "Dark": 0.5, "Fairy": 0.5}
Fairy = {"Normal": 1, "Fighting": 2, "Poison": 0.5, "Ground": 1, "Flying": 1, "Bug": 1, "Rock": 1, "Ghost": 1, "Steel": 0.5, "Fire": 0.5, "Water": 1, "Electric": 1, "Grass": 1, "Ice": 1, "Psychic": 1, "Dragon": 2, "Dark": 2, "Fairy": 1}

typeRelation = {"Normal": Normal, "Fighting": Fighting, "Poison": Poison, "Ground": Ground, "Flying": Flying, "Bug": Bug, "Rock": Rock, "Ghost": Ghost, "Steel": Steel, "Fire": Fire, "Water": Water, "Electric": Electric, "Grass": Grass, "Ice": Ice, "Psychic": Psychic, "Dragon": Dragon, "Dark": Dark, "Fairy": Fairy}

In [None]:
dfTypeRelation = pd.DataFrame(typeRelation)
print('Row is Deffender, Column is Attacker')

fig = px.imshow(dfTypeRelation, color_continuous_scale="cividis")
fig.update_xaxes(side='top')
fig.show()

# Find Pokemon with Most Wins and add to word cloud

In [None]:
combatNames = combats[cols].replace(names_dict)
print(combatNames['Winner'].value_counts()[:20])

winners = list(combatNames['Winner'])
winnersSTR = [str(i) for i in winners]
winnersTXT = (',').join(winnersSTR)
wc = WordCloud(background_color='black', margin=2).generate(winnersTXT)
plt.figure(figsize=[20,20])
plt.axis('off')
plt.imshow(wc)

# Create a win/loss ratio DF

In [None]:
firstNum = combatNames['First_pokemon'].value_counts()
secondNum = combatNames['Second_pokemon'].value_counts()
totalBattles = firstNum + secondNum
battleWin = pd.DataFrame({'battle': totalBattles, 'win': combatNames['Winner'].value_counts()}, columns=['battle', 'win'])
battleWin['ratio'] = battleWin['win']/battleWin['battle']
battleWin.sort_values(by=['ratio'], ascending=False).head(20)

# Prediction Time

Check for NaN or Null left in the df

In [None]:
pokemon.info()
battleWin.info()

There is a pokemon that does not have a win or a ratio

In [None]:
battleWin[battleWin['win'].isnull()]

Shuckle has 135 battles but no wins or ratio. Let change the nans to zeros

In [None]:
battleWin.loc["Shuckle", ['win', 'ratio']] = 0
battleWin[battleWin.index=='Shuckle']

Now we need to find the pokemon who have a ratio > 1.

In [None]:
id_dict = dict(zip(pokemon['Name'], pokemon['#']))
battleWin["Name"] = battleWin.index
battleWin["#"] = battleWin["Name"].replace(id_dict)
ratio_dict = dict(zip(battleWin['#'], battleWin['win']))
pokemon["win"] = pokemon["#"].replace(ratio_dict)
ratio_dict = dict(zip(battleWin['#'], battleWin['ratio']))
pokemon["ratio"] = pokemon["#"].replace(ratio_dict)

pokemon.head()

In [None]:
noBattle = pokemon[pokemon['ratio']>1]
print('There are {} pokemon who have NaN ratio (above 1)'.format(len(noBattle.index)))

noBattle[['#','Name', 'ratio']]

Check for NaN Names

In [None]:
pokemon.info()
pokemon[pokemon['Name'].isnull()]

Pokemon 63 is missing it's name a quick internet search shows it is "Primape"

In [None]:
pokemon.loc[62, 'Name']='Primape'
pokemon[pokemon['Name']=='Primape'][['#', 'Name', 'ratio']]

In [None]:
battlePokemon = pokemon[pokemon['ratio']<=1]

fig = go.Figure()
fig = px.scatter(battlePokemon, x='stats_sum', y='ratio', color='Type 1', size='win', trendline='ols', hover_name='Name')

fig.show()

In [None]:
battlePokemon.head()

# Graphs by Type 1

In [None]:
fig = px.scatter(battlePokemon, x='stats_sum', y='ratio', color='Type 1', size='win', trendline='ols', hover_name='Name',
                facet_col='Type 1', facet_col_wrap=3,width=1500, height=3000)
fig.show()

# Graphs By Type 2

In [None]:
fig = px.scatter(battlePokemon, x='stats_sum', y='ratio', color='Type 2', size='win', trendline='ols', hover_name='Name',
                facet_col='Type 2', facet_col_wrap=3,width=1500, height=3000)
fig.show()

# Single Type Pokemon Graphs

In [None]:
singleType = battlePokemon[battlePokemon['Type 2'] == 'None']
singleType

There are 377 Pokemon with only 1 type.

In [None]:
fig = go.Figure()
fig = px.scatter(singleType, x='stats_sum', y='ratio', color='Type 1', size='win', trendline='ols', hover_name='Name')

fig.show()

# Graphs by Type

In [None]:
fig = px.scatter(singleType, x='stats_sum', y='ratio', color='Type 1', size='win', trendline='ols', hover_name='Name',
                facet_col='Type 1', facet_col_wrap=3,width=1500, height=3000)
fig.show()

# Prep Dataframe for Predictions

Add specs to battle dataframe

In [None]:
pokemonCOPY = pokemon.copy()

pokemonCOPY['Type'] = pokemonCOPY[['Type 1', 'Type 2']].agg('/'.join, axis=1)
pokemonCOPY = pokemonCOPY.drop(['Type 1', 'Type 2'], axis=1)
print(pokemonCOPY.head())

In [None]:
combatAddData = combats.copy()

# Create dict() of variables for regression
typeDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['Type']))
hpDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['HP']))
attackDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['Attack']))
defenseDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['Defense']))
spattackDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['Sp. Atk']))
spdefenseDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['Sp. Def']))
speedDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['Speed']))
statsSumDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['stats_sum']))
winSumDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['win']))
ratioDict = dict(zip(pokemonCOPY['#'], pokemonCOPY['ratio']))

# Add first pokemon stats to new combat df
combatAddData['First_pokemon_Type'] = combatAddData['First_pokemon'].replace(typeDict)
combatAddData['First_pokemon_HP'] = combatAddData['First_pokemon'].replace(hpDict)
combatAddData['First_pokemon_ATK'] = combatAddData['First_pokemon'].replace(attackDict)
combatAddData['First_pokemon_DEF'] = combatAddData['First_pokemon'].replace(defenseDict)
combatAddData['First_pokemon_SP_ATK'] = combatAddData['First_pokemon'].replace(spattackDict)
combatAddData['First_pokemon_DP_DEF'] = combatAddData['First_pokemon'].replace(spdefenseDict)
combatAddData['First_pokemon_Speed'] = combatAddData['First_pokemon'].replace(speedDict)
combatAddData['First_pokemon_STATS_SUM'] = combatAddData['First_pokemon'].replace(statsSumDict)
combatAddData['First_pokemon_RATIO'] = combatAddData['First_pokemon'].replace(ratioDict)

# Add second pokemon stats to new combat df
combatAddData['Second_pokemon_Type'] = combatAddData['Second_pokemon'].replace(typeDict)
combatAddData['Second_pokemon_HP'] = combatAddData['Second_pokemon'].replace(hpDict)
combatAddData['Second_pokemon_ATK'] = combatAddData['Second_pokemon'].replace(attackDict)
combatAddData['Second_pokemon_DEF'] = combatAddData['Second_pokemon'].replace(defenseDict)
combatAddData['Second_pokemon_SP_ATK'] = combatAddData['Second_pokemon'].replace(spattackDict)
combatAddData['Second_pokemon_DP_DEF'] = combatAddData['Second_pokemon'].replace(spdefenseDict)
combatAddData['Second_pokemon_Speed'] = combatAddData['Second_pokemon'].replace(speedDict)
combatAddData['Second_pokemon_STATS_SUM'] = combatAddData['Second_pokemon'].replace(statsSumDict)
combatAddData['Second_pokemon_RATIO'] = combatAddData['Second_pokemon'].replace(ratioDict)

In [None]:
dfTypeRelation.head()

Function to build a column for type relation/damage

In [None]:
def calcRelationType(combatAddData):
    r0 = 1
    firstType1 = combatAddData['First_pokemon_Type'].split('/')[0]
    firstType2 = combatAddData['First_pokemon_Type'].split('/')[1]
    secondType1 = combatAddData['Second_pokemon_Type'].split('/')[0]
    secondType2 = combatAddData['Second_pokemon_Type'].split('/')[1]
    if firstType2 != "None" and secondType2 != "None":
            r1 = dfTypeRelation[firstType1][secondType1]
            r2 = dfTypeRelation[firstType1][secondType2]
            r3 = dfTypeRelation[firstType2][secondType1]
            r4 = dfTypeRelation[firstType2][secondType2]
            r = r0 * r1 * r2 * r3 * r4
    elif firstType2 != "None" and secondType2 == "None":
            r1 = dfTypeRelation[firstType1][secondType1]
            r3 = dfTypeRelation[firstType2][secondType1]
            r = r0 * r1 * r3
    elif firstType2 == "None" and secondType2 != "None":
            r1 = dfTypeRelation[firstType1][secondType1]
            r2 = dfTypeRelation[firstType1][secondType2]
            r = r0 * r1 * r2
    elif firstType2 == "None" and secondType2 == "None":
            r1 = dfTypeRelation[firstType1][secondType1]
            r = r0 * r1
    return r

combatAddData['Relation'] = combatAddData.apply(lambda x: calcRelationType(x), axis=1)

In [None]:
# First to attack the winner column
combatAddData['First Win'] = combatAddData.apply(lambda x: 1 if x['First_pokemon']==x['Winner'] else 0, axis=1)

# Drop uneeded columns
dropCols = ["First_pokemon", "Second_pokemon", "Winner", "First_pokemon_Type", "Second_pokemon_Type"]
combatAddData = combatAddData.drop(dropCols, axis=1)
combatAddData.head()

# Summarize and Split final dataset

In [None]:
combatAddData.info()
ProfileReport(combatAddData)

In [None]:
X = combatAddData.drop('First Win', axis=1)
y = combatAddData['First Win']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=1)
print("X_train.shape = " + str(X_train.shape))
print("X_test.shape = " + str(X_test.shape))
print("y_train.shape = " + str(y_train.shape))
print("y_test.shape = " + str(y_test.shape))

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(max_iter=2000)
logreg.fit(X_train, y_train)
accLog = round(logreg.score(X_test, y_test)*100, 2)
accLog

# KNN Classification

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, y_train)
accKnn = round(knn.score(X_test, y_test) * 100, 2)
accKnn

# Gaussian Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
gaussian = GaussianNB()
gaussian.fit(X_train, y_train)
accGaussian = round(gaussian.score(X_test, y_test) * 100, 2)
accGaussian

# Perceptron 

In [None]:
# Perceptron
from sklearn.linear_model import Perceptron
perceptron = Perceptron()
perceptron.fit(X_train, y_train)
accPerceptron = round(perceptron.score(X_test, y_test) * 100, 2)
accPerceptron

# Decision Tree

In [None]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
decisionTree = DecisionTreeClassifier()
decisionTree.fit(X_train, y_train)
accDecisionTree = round(decisionTree.score(X_test, y_test) * 100, 2)
accDecisionTree

# Random Forest

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
randomForest = RandomForestClassifier(n_estimators=100)
randomForest.fit(X_train, y_train)
accRandomForest = round(randomForest.score(X_test, y_test) * 100, 2)
accRandomForest

Create accuracy dataframe

In [None]:
models = pd.DataFrame({
    'Models': ['Logistice Regression', 'KNN', 'Naive Bayes', 'Perceptron', 'Decision Tree', 'Random Forest'],
    'Score' : [accLog, accKnn, accGaussian, accPerceptron, accDecisionTree, accRandomForest]
})

models.sort_values(by='Score', ascending=False)

Highest score is Random Forest. Lets look at feature mapping to see what is the most important

In [None]:
effective = pd.DataFrame()
effective["feature_name"] = X.columns.tolist()
effective["feature_importance"] = randomForest.feature_importances_
effective.sort_values("feature_importance",ascending=False)

# Test Predictions

In [None]:
tests = pd.read_csv('../input/pokemon-challenge/tests.csv')
testAddData = tests.copy()
testAddData.head()

In [None]:

# Add first pokemon stats to new combat df
testAddData['First_pokemon_Type'] = testAddData['First_pokemon'].replace(typeDict)
testAddData['First_pokemon_HP'] = testAddData['First_pokemon'].replace(hpDict)
testAddData['First_pokemon_ATK'] = testAddData['First_pokemon'].replace(attackDict)
testAddData['First_pokemon_DEF'] = testAddData['First_pokemon'].replace(defenseDict)
testAddData['First_pokemon_SP_ATK'] = testAddData['First_pokemon'].replace(spattackDict)
testAddData['First_pokemon_DP_DEF'] = testAddData['First_pokemon'].replace(spdefenseDict)
testAddData['First_pokemon_Speed'] = testAddData['First_pokemon'].replace(speedDict)
testAddData['First_pokemon_STATS_SUM'] = testAddData['First_pokemon'].replace(statsSumDict)
testAddData['First_pokemon_RATIO'] = testAddData['First_pokemon'].replace(ratioDict)

# Add second pokemon stats to new combat df
testAddData['Second_pokemon_Type'] = testAddData['Second_pokemon'].replace(typeDict)
testAddData['Second_pokemon_HP'] = testAddData['Second_pokemon'].replace(hpDict)
testAddData['Second_pokemon_ATK'] = testAddData['Second_pokemon'].replace(attackDict)
testAddData['Second_pokemon_DEF'] = testAddData['Second_pokemon'].replace(defenseDict)
testAddData['Second_pokemon_SP_ATK'] = testAddData['Second_pokemon'].replace(spattackDict)
testAddData['Second_pokemon_DP_DEF'] = testAddData['Second_pokemon'].replace(spdefenseDict)
testAddData['Second_pokemon_Speed'] = testAddData['Second_pokemon'].replace(speedDict)
testAddData['Second_pokemon_STATS_SUM'] = testAddData['Second_pokemon'].replace(statsSumDict)
testAddData['Second_pokemon_RATIO'] = testAddData['Second_pokemon'].replace(ratioDict)

In [None]:
testAddData["Relation"] = testAddData.apply(lambda x: calcRelationType(x), axis = 1)

# Drop cols for prediction
dropCols2 = ["First_pokemon", "Second_pokemon", "First_pokemon_Type", "Second_pokemon_Type"]
testAddData = testAddData.drop(dropCols2, axis=1)

In [None]:
y_predict = randomForest.predict(testAddData)
data = {"First_pokemon": tests["First_pokemon"], "Second_pokemon": tests["Second_pokemon"], "First_win": y_predict}
submission = pd.DataFrame(data=data, columns=["First_pokemon", "Second_pokemon", "First_win"])
winner = pd.DataFrame(submission.apply(lambda x: x["First_pokemon"] if x["First_win"]==1 else x["Second_pokemon"], axis=1), columns=["Winner"])
final = pd.concat([submission, winner], axis=1)

#Drop First_win
names_dict = dict(zip(pokemon['#'], pokemon['Name']))
finalCols = ['First_pokemon', 'Second_pokemon', 'Winner']
combatName = combats[finalCols].replace(names_dict)

combatName


In [None]:
testAddData.head()