In [None]:
import numpy as np
import pandas as pd

Using the data provided, we will be attempting to create a quick model to guess the winner of a pokemon battle
Based on data provided such as Attack, Defense, Special Attack, Special Defense and so on.

In [None]:
combats = pd.read_csv('../input/pokemon-challenge/combats.csv')
poke = pd.read_csv('../input/pokemon-challenge/pokemon.csv')

In [None]:
combats.head()

In [None]:
poke.head()

To take full advantage the types of the pokemon given to us, we must be able to compare which pokemon is weak to the other.

In [None]:
weaknesses = pd.read_json("../input/pokemon-weaknesses/types.json")
weaknesses.set_index('name',inplace=True)
weaknesses.head()

Credits to filipekiss for this json file of the weaknesses of each pokemon type. It can be found here:
https://github.com/filipekiss/pokemon-type-chart/blob/master/types.json

In [None]:
poke.set_index('#', inplace=True)

In [None]:
first_poke = combats.join(poke, on='First_pokemon',how='left')

In [None]:
second_poke = combats.join(poke, on='Second_pokemon', how = 'left')

I've chosen to drop Generation because in the end, battle stats are what matters.
I dropped the Legendary column because I believe that the stats will already reflect how special a legendary is. Therefore I felt as if it was rather redundant.

In [None]:
first_poke.drop(['First_pokemon', 'Second_pokemon', 'Winner', 'Name', 'Generation', 'Legendary'], axis=1, inplace=True)
second_poke.drop(['First_pokemon', 'Second_pokemon', 'Winner', 'Name', 'Generation', 'Legendary'], axis=1, inplace=True)

First we obtain the winners column.
Note that -1 assigned to the result of the first pokemon winning is just for visual ease later on.
As we'll see in a bit, negative values in our final dataset mean that the first pokemon is at an advantage.

In [None]:

winners = np.zeros(len(combats))
for i, row in combats.iterrows():
    if row[0] == row[2] :
        winners[i] = -1
    else:
        winners[i] = 1     

In [None]:
first_poke.head()

The technique we'll be using to handle the advantages and disadvantages of pokemon type is as follows:
* negative values correspond to pokemon 1's advantage and positive values correspond to pokemon 2's advantage
* weakness advantage and strength advantage will be awarded a point in favour of the advantaged
* immunity advantage will be awarded two points in favour of the advantaged

For example:
-3 could possibly indicate that pokemon 1 first type is immune to pokemon 2's first type (-2 points) and is strong versus pokemon 2's second type (-1 point)
    

In [None]:
type_matchup = np.zeros(len(combats))
for i, row in combats.iterrows() :

    poke1type1 = poke.ix[int(row[0]), 'Type 1']
    poke1type2 = poke.ix[int(row[0]), 'Type 2']
    poke2type1 = poke.ix[int(row[1]), 'Type 1']
    poke2type2 = poke.ix[int(row[1]), 'Type 2']
    
    
    #is poke1 weak to poke 2
    if poke2type1 in weaknesses.ix[poke1type1, 'weaknesses']:
        type_matchup[i] += 1 #lower negative values mean poke 1 is at adv, higher positive values mean poke 2 is at adv
    elif poke2type1 in weaknesses.ix[poke1type1, 'immunes']:
        type_matchup[i] += 2
    
    if pd.notnull(poke1type2) and poke2type1 in weaknesses.ix[poke1type2, 'weaknesses']:
        type_matchup[i] += 1
    elif pd.notnull(poke1type2) and poke2type1 in weaknesses.ix[poke1type2, 'immunes']:
        type_matchup[i] += 2
    
    if pd.notnull(poke2type2) and poke2type2 in weaknesses.ix[poke1type1, 'weaknesses']:
        type_matchup[i] += 1 
    elif pd.notnull(poke2type2) and poke2type2 in weaknesses.ix[poke1type1, 'immunes']:
        type_matchup[i] += 2
    
    if pd.notnull(poke1type2) and pd.notnull(poke2type2) and poke2type2 in weaknesses.ix[poke1type2, 'weaknesses']:
        type_matchup[i] += 1 
    elif pd.notnull(poke1type2) and pd.notnull(poke2type2) and poke2type2 in weaknesses.ix[poke1type2, 'immunes']:
        type_matchup[i] += 2
    
    
    #is poke 2 weak to poke 1 
    if poke1type1 in weaknesses.ix[poke2type1, 'weaknesses']:
        type_matchup[i] -= 1
    elif poke1type1 in weaknesses.ix[poke2type1, 'immunes']:
        type_matchup[i] -= 2
    
    if pd.notnull(poke2type2) and poke1type1 in weaknesses.ix[poke2type2, 'weaknesses']:
        type_matchup[i] -= 1
    elif pd.notnull(poke2type2) and poke1type1 in weaknesses.ix[poke2type2, 'immunes']:
        type_matchup[i] -= 2
    
    if pd.notnull(poke1type2) and poke1type2 in weaknesses.ix[poke2type1, 'weaknesses']:
        type_matchup[i] -= 1 #lower negative values mean poke 1 is at adv, higher positive values mean poke 2 is at adv
    elif pd.notnull(poke1type2) and poke1type2 in weaknesses.ix[poke2type1, 'immunes']:
        type_matchup[i] -= 2
    
    if pd.notnull(poke1type2) and pd.notnull(poke2type2) and poke1type2 in weaknesses.ix[poke2type2, 'weaknesses']:
        type_matchup[i] -= 1 
    elif pd.notnull(poke1type2) and pd.notnull(poke2type2) and poke1type2 in weaknesses.ix[poke2type2, 'immunes']:
        type_matchup[i] -= 2
    
    #is poke 1 strong vs poke 2
    if poke2type1 in weaknesses.ix[poke1type1, 'strengths']:
        type_matchup[i] += 1 
    if pd.notnull(poke1type2) and poke2type1 in weaknesses.ix[poke1type2, 'strengths']:
        type_matchup[i] += 1
        
    if pd.notnull(poke2type2) and poke2type2 in weaknesses.ix[poke1type1, 'strengths']:
        type_matchup[i] += 1 
    if pd.notnull(poke1type2) and pd.notnull(poke2type2) and poke2type2 in weaknesses.ix[poke1type2, 'strengths']:
        type_matchup[i] += 1 
        
        #is poke 1 strong vs poke 2
    if poke1type1 in weaknesses.ix[poke2type1, 'strengths']:
        type_matchup[i] += 1 
    if pd.notnull(poke2type2) and poke1type1 in weaknesses.ix[poke2type2, 'strengths']:
        type_matchup[i] += 1        
    if pd.notnull(poke1type2) and poke1type2 in weaknesses.ix[poke2type1, 'strengths']:
        type_matchup[i] += 1 
    if pd.notnull(poke1type2) and pd.notnull(poke2type2) and poke1type2 in weaknesses.ix[poke2type2, 'strengths']:
        type_matchup[i] += 1

In [None]:
print(type_matchup)

Lets validate this array by comparing. Looks like it worked with Grass being extremely effective against
Rock and Ground (3 points!)

In [None]:
print(first_poke.head())
print(second_poke.head())

In [None]:
#taking the numerical values of the dataset, we'll deal with the other values separately
first_poke_num = first_poke.iloc[:,2:8]
second_poke_num = second_poke.iloc[:,2:8]
first_poke_num.head()

In [None]:
#combine the two number rows through subtraction
poke_num = second_poke_num - first_poke_num
poke_num.head()

In [None]:
#all the columns put together we have:
poke_df = pd.concat([pd.Series(winners,name='winners'),pd.Series(type_matchup,name='type_matchup'), poke_num], axis=1)

In [None]:
poke_df.head(20)

Visually we can see that more negative values mean that pokemon 1 is more likely to win and vice versa.
Let's use a Random Forest Classifier as it's easy to train and will hopefully be effective with our prepped data.

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [None]:
forest = RandomForestClassifier()
train_set, test_set = train_test_split(poke_df, test_size=0.4)
test_set, valid_set = train_test_split(test_set, test_size=0.5)

In [None]:
forest.fit(X=train_set.iloc[:,1:], y=train_set.iloc[:,0])

In [None]:
forest.score(X=test_set.iloc[:,1:], y=test_set.iloc[:,0])

Over 94% accuracy with only several cells of code! Pretty good