In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from scipy import stats
import warnings
import statsmodels.api as sm 

In [None]:
df = pd.read_csv('C:/Users/teren/Documents/Python/Practice/Pokemon Analysis/Pokemon.csv')
df = df.rename(index=str, columns={"#": "Number"})
combat = pd.read_csv("C:/Users/teren/Documents/Python/Practice/Pokemon Analysis/combats.csv")

In [None]:
print("Dimenstions of Pokemon: " + str(df.shape))
print("Dimenstions of Combat: " + str(combat.shape))

In [None]:
#Combat csv has a unique ID for mega evolution pokemon as well, unlike the Pokemon sheet.
#This field will allow us to merge the combat data with the Poekmon data as well.
df.insert(0, 'Combat ID', range(1, 1 + len(df)))
df.head()

In [None]:
#Determine if a pokemon is a megaevolution or not
df['Mega'] = pd.np.where(df.Name.str.contains("Mega"), 1, 0)

#Determine if a pokemon is viable for competive play, this excludes mega evolutions
conditions1 = [(df['Legendary'] == 0) & (df['Total'] > 600) & (df['Mega'] == 0),
              (df['Mega'] == 1),
              (df['Total'] <= 600)]
values1 = [1, 0, 1]
df['Competitive_NoMega'] = np.select(conditions1, values1)

#Determine a pokemon's battle type
conditions2 = [(df['Attack'] >= 100) & (df['Speed'] >= 100) & (df['Attack'] > df['Sp. Atk']),
              (df['Sp. Atk'] >= 100) & (df['Speed'] >= 100) & (df['Sp. Atk'] > df['Attack']),
              (df['Attack'] >= 100) & (df['Speed'] >= 100) & (df['Sp. Atk'] == df['Attack'])]
values2 = ['Phy Sweeper', 'Spec Sweeper', 'Mixed Sweeper']
df['Battle_Type'] = np.select(conditions2, values2)

#Determine if a pokemon is viable for competive play, this includes mega evolutions
conditions3 = [(df['Legendary'] == 0) & (df['Total'] > 600),
              (df['Total'] <= 600)]
values3 = [1, 1]
df['Competitive_Mega'] = np.select(conditions3, values3)

In [None]:
df_comp = df.loc[df['Competitive_NoMega'] == 1]
df_comp.head()

In [None]:
# calculate the win % of each pokemon 
# add the calculation to the pokemon dataset 
total_Wins = combat.Winner.value_counts()
# get the number of wins for each pokemon
numberOfWins = combat.groupby('Winner').count()

#both methods produce the same results
countByFirst = combat.groupby('Second_pokemon').count()
countBySecond = combat.groupby('First_pokemon').count()

In [None]:
numberOfWins.head()

In [None]:
numberOfWins = numberOfWins.sort_index()
numberOfWins['Total Fights'] = countByFirst.Winner + countBySecond.Winner
numberOfWins['Win Percentage']= numberOfWins.First_pokemon/numberOfWins['Total Fights']

# merge the winning dataset and the original pokemon dataset
df2 = pd.merge(df, numberOfWins, left_on='Combat ID', right_index = True, how='left')

# We can look at the difference between the two datasets to see which pokemon never recorded a fight
#missing_Pokemon = np.setdiff1d(pokemon.index.values, results3.index.values)
#subset the dataframe where pokemon win percent is NaN
df2[df2['Win Percentage'].isnull()]

In [None]:
df2.describe()

In [None]:
df2.loc[df2["Number"] == 382]

In [None]:
#Top 10 Best Pokemon with the best win perentage
df2[np.isfinite(df2['Win Percentage'])].sort_values(by = ['Win Percentage'], ascending = False ).head(10)

In [None]:
#Top 10 Worst Pokemon with the worst win perentage
df2[np.isfinite(df2['Win Percentage'])].sort_values(by = ['Win Percentage']).head(10)

In [None]:
f, ax = plt.subplots(figsize=(14, 10))
sns.countplot(x="Type 1",  data=df2, hue = 'Generation', order = df2["Type 1"].value_counts().index)
plt.xticks(rotation= 90)
plt.xlabel('Type 1')
plt.ylabel('Total ')
plt.title("Total Pokemon by Type 1 & Generation")

In [None]:
df2.groupby('Type 1').agg({"Win Percentage": "mean"}).sort_values(by = "Win Percentage", ascending = False)

In [None]:
#How does Win Percentage vary between Pokemon Types
f, ax = plt.subplots(figsize=(14, 10))
ax = sns.boxplot(x = 'Type 1', y = 'Win Percentage', data = df2)
ax.set_yticklabels(y_value)

In [None]:
#sns.distplot(results3["Win Percentage"].dropna(), bins=20)
col = ['Type 1','HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Win Percentage']
#results3.loc[:,'HP':'Speed'].corr()
sns.pairplot(df2.loc[:,col].dropna())

In [None]:
col = ['Total', 'HP', 'Attack', 'Sp. Atk', 'Sp. Def', 'Speed', 'Win Percentage']
f, ax = plt.subplots(figsize=(12, 9))
sns.set(font_scale=1.25)
sns.heatmap(df2.loc[:,col].corr(), cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10})

In [None]:
f, ax = plt.subplots(figsize=(12, 9))
sns.regplot(x="Speed", y="Win Percentage", data=df2, logistic=True).set_title("Speed vs Win Percentage")

In [None]:
sns.lmplot(x="Speed", y="Win Percentage", data=df2, hue = 'Type 1',  logistic=True).fig.set_size_inches(12,10)
#.set_title("Speed vs Win Percentage")

In [None]:
f, ax = plt.subplots(figsize=(12, 9))
sns.regplot(x="Total", y="Win Percentage", data=df2).set_title("Total vs Win Percentage")

sns.lmplot(x="Total", y="Win Percentage", data=df2, hue = 'Generation').fig.set_size_inches(12,10)#.set_title("Speed vs Win Percentage")

In [None]:
f, ax = plt.subplots(figsize=(12, 9))
sns.set_color_codes("pastel")
ax = sns.countplot(x="Type 1", hue="Legendary", data=df2)
#f.set_xticklabels(rotation=30)
plt.xticks(rotation= 90)
plt.xlabel('Type 1')
plt.ylabel('Total ')
plt.title("Total Pokemon by Type 1")

In [None]:
stats.f_oneway(df2[df2['Generation'].eq(1)]['Total'],
               df2[df2['Generation'].eq(2)]['Total'],
               df2[df2['Generation'].eq(3)]['Total'],
               df2[df2['Generation'].eq(4)]['Total'],
               df2[df2['Generation'].eq(5)]['Total'],
               df2[df2['Generation'].eq(6)]['Total'])

In [None]:
df_Sweeper = df[df['Battle_Type'].str.contains("Sweeper")]
df_PSweeper = df[df['Battle_Type'].str.contains("Phy Sweeper")]
df_SSweeper = df[df['Battle_Type'].str.contains("Spec Sweeper")]

In [None]:
df_Sweeper[df_Sweeper['Generation'].eq(4)].head()

In [None]:
sns.boxplot(x = 'Generation', y = 'Total', data = df_Sweeper)

In [None]:
sns.distplot(df2["Total"])

In [None]:
 dataset = df2