## MATH 120 Final Project Proposal

In [91]:
import os
import sys

# Check if running in Google Colab
try:
    import google.colab
    IN_COLAB = True
    print("Running in Google Colab")

    # Clone repository if in Colab
    if not os.path.exists('/content/MATH_120_Final_Project/'):
        !git clone https://github.com/marconoriega0703-sys/MATH_120_Final_Project.git

    # Change to project directory
    os.chdir('/content/MATH_120_Final_Project/')

except ImportError:
    IN_COLAB = False
    print("Running locally")

# Add src directory to Python path
if 'src' not in sys.path:
    sys.path.append('src')

print(f"Current working directory: {os.getcwd()}")

Running in Google Colab
Current working directory: /content/MATH_120_Final_Project


## Packages

In [92]:
import pandas as pd
import plotly.express as px
from google.colab import files

In [93]:
pokemon = pd.read_csv("data_raw/Pokemon.csv")
pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


## Cleaned Pokemon Data

In [94]:
# Cleans Pokemon Data by creating a new data frame that excludes duplicate Pokedex ids
# drop_duplicates() method removes duplicates from the data
# subset=['#'] removes duplicates that from "#" column
# keep="first" keeps the first occurance of rows with the same "#"
# keeping the 'first' occurrence which represents the original form in most cases.
cleaned_pokemon = pokemon.drop_duplicates(subset=["#"], keep="first")
cleaned_pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
5,5,Charmeleon,Fire,,405,58,64,58,80,65,80,1,False


## Download Cleaned Pokemon Data

In [95]:
cleaned_pokemon.to_csv("cleaned_Pokemon.csv", index=False)
files.download("cleaned_Pokemon.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Pokemon Class:

In [96]:
class Pokemon:
  """
  A class representing a Pokemon.
  Attributes:
    name: The name of the Pokemon.
    type1: The primary type of the Pokemon.
    type2: The secondary type of the Pokemon (can be None).
    total: The sum of all base stats.
    hp: Hit Points stat.
    attack: Attack stat.
    defense: Defense stat.
    sp_atk: Special Attack stat.
    sp_def: Special Defense stat.
    generation: Generation of the Pokemon.
    speed: Speed stat.
    legendary: Whether the Pokemon is legendary.
  """
  def __init__(self, pokemon_row):
    self.name = pokemon_row['Name']
    self.type1 = pokemon_row['Type 1']
    self.type2 = pokemon_row['Type 2']
    self.total = pokemon_row['Total']
    self.hp = pokemon_row['HP']
    self.attack = pokemon_row['Attack']
    self.defense = pokemon_row['Defense']
    self.sp_atk = pokemon_row['Sp. Atk']
    self.sp_def = pokemon_row['Sp. Def']
    self.speed = pokemon_row['Speed']
    self.generation = pokemon_row['Generation']
    self.legendary = pokemon_row['Legendary']

  def find(self):
    return (f"Pokemon Name: {self.name}\n"
            f"Primary Type: {self.type1}\n"
            f"Secondary Type: {self.type2}\n"
            f"Total Stats: {self.total}\n"
            f"HP: {self.hp}\n"
            f"Attack: {self.attack}\n"
            f"Defense: {self.defense}\n"
            f"Special Attack: {self.sp_atk}\n"
            f"Special Defense: {self.sp_def}\n"
            f"Speed: {self.speed}\n"
            f"Generation: {self.generation}\n"
            f"Legendary: {self.legendary}")

In [97]:
def find_pokemon(pokedex_number):
  """
  Finds a Pokemon by its name.
  unique_pokemon.empty checks if the Pokedex # is assigned to a Pokemon
  pokemon_row stores data of Pokemon that is matched with a Pokedex number
  if not find_pokemon will return "Pokemon not found"
  """
  unique_pokemon = cleaned_pokemon[cleaned_pokemon['#'] == pokedex_number]

  if not unique_pokemon.empty:
    pokemon_row = unique_pokemon.iloc[0]
    pokemon_found = Pokemon(pokemon_row)
    return pokemon_found.find()
  else:
    return "Pokemon not found"

In [98]:
print(find_pokemon(23))

Pokemon Name: Ekans
Primary Type: Poison
Secondary Type: nan
Total Stats: 288
HP: 35
Attack: 60
Defense: 44
Special Attack: 40
Special Defense: 54
Speed: 55
Generation: 1
Legendary: False


Darmanitan has a Zen Form that uses Pokedex ID # 555. This test case is used to show that the function find_pokemon class only retrieves the original Pokemon.

In [99]:
print(find_pokemon(555))

Pokemon Name: DarmanitanStandard Mode
Primary Type: Fire
Secondary Type: nan
Total Stats: 480
HP: 105
Attack: 140
Defense: 55
Special Attack: 30
Special Defense: 55
Speed: 95
Generation: 5
Legendary: False


The number 0 is not assigned to a single Pokemon. This test case is to show that function find_pokemon prints out the custom error message.

In [100]:
print(find_pokemon(0))

Pokemon not found


## Distribution of Primary Typings

In [101]:
most_common_primary_type = cleaned_pokemon['Type 1'].value_counts().sort_values()

fig = px.pie(
    names=most_common_primary_type.index,
    values=most_common_primary_type.values,
    title='Distribution of Primary Typings'
)

fig.show()

## Distribution of Secondary Typings

In [102]:
most_common_secondary_type = cleaned_pokemon['Type 2'].value_counts().sort_values()

fig = px.pie(
    names=most_common_secondary_type.index,
    values=most_common_secondary_type.values,
    title='Distribution of Secondary Typings'
)

fig.show()

## Distribution of Pokemon Typings


In [103]:
# pd.concat joins data from two different columns
# this is more appropriate than merge as merge is used to combine common keys from different data frames
# Type 1 and Type 2 are in the same data frame, but are in different columns
# dropna() excludes NaN values from appearing in the bar chart
typing = pd.concat([cleaned_pokemon['Type 1'], cleaned_pokemon['Type 2'].dropna()])

most_common_type = typing.value_counts().sort_values()

fig = px.pie(
    names=most_common_type.index,
    values=most_common_type.values,
    title='Distribution of Pokemon Typings'
)

fig.show()

## Summary of the Visualizations: Distribution of Pokemon Typings

* Water is the most plentiful primary and overall typing.
* Flying is the most common secondary typing, but is also the least common primary typing.
* Bug is the least common secondary typing and Ice is the least common overall typing.

## Total Base Stats Trends among Pokemon Generations

In [104]:
total_base_stats_trends_by_generation = cleaned_pokemon.groupby('Generation')['Total'].mean().reset_index()

fig = px.line(
    x=total_base_stats_trends_by_generation['Generation'],
    y=total_base_stats_trends_by_generation['Total'],
    labels={'x': 'Generation', 'y': 'Average Total Base Stats'},
    title='Trends in Average Pokemon Base Stats: Generations 1-6',
    markers=True
)
fig.show()

## Summary of the Visualization: Total Base Stats Trends among Pokemon Generations

*  There is a slight decrease of the average total base stats when comparing Generation 2 Pokemon from those of Generation 1. The difference in average total base stat decrease of Generation 3 Pokemon from Generation 2 is significantly greater (4.121 decrease) than the difference in decrease of Generation 2 from Generation 1 (0.899 decrease).

*  Generation 4 is also the peak of average total base stats with Generation 4 also having the biggest differences when compared to any of the other Generations in the dataset.

*  The average total base stat of the recent Generations (4, 5 and 6) are substantially better than the older Generations (1, 2, and 3).

## Average Base Stats of Legendary and Non-Legendary Pokemon

In [105]:
total_base_stats_by_legendary = cleaned_pokemon.groupby('Legendary')['Total'].mean().reset_index()
print('Average Total Base Stats by Legendary Status:')
print(total_base_stats_by_legendary.to_string(index=False))

Average Total Base Stats by Legendary Status:
 Legendary      Total
     False 404.161481
      True 620.217391


In [106]:
total_base_stats_trend_by_legendary_status = cleaned_pokemon.groupby(['Generation', 'Legendary'])['Total'].mean().reset_index()

# .map() maps the boolean "True" and "False" values with "Legendary" and "Non-Legendary"
total_base_stats_trend_by_legendary_status['Legendary Status'] = total_base_stats_trend_by_legendary_status['Legendary'].map({True: 'Legendary', False: 'Non-Legendary'})

fig = px.line(
    x=total_base_stats_trend_by_legendary_status['Generation'],
    y=total_base_stats_trend_by_legendary_status['Total'],
    color=total_base_stats_trend_by_legendary_status['Legendary Status'],
    labels={'x': "Generation", 'y': 'Average Total Base Stats', 'color': 'Legendary Status'},
    title='Comparison of Average Total Base Stats: Legendary vs. Non-Legendary Pokemon Across Generation',
    markers=True
)
fig.show()

## Summary of the Visualization: Legendary vs. Non-Legendary Base Stats by Generation

* The average total base stats for Legendary Pokemon for each Generation is always greater than 600.

* Legendary Pokemon in five of the six Generations maintain a 200 stat lead in comparison to their Non-Legendary counterparts. Generation 5 is the only Generation where the Legendary Pokemon does not have a 200 stat lead compared to Non-Legendaries (199.4795 stat lead).

* Globally, Legendaries have an average total base stat of 620.217, while the average total base stat of Non-Legendaries is 404.161.

Overall, Legendary Pokemon consistently have substantially better average total base stats than Non-Legendary Pokemon.

## Bar Charts that Finds the Most Offensive and Defense Pokemon Typings

In [107]:
def merge_speed(pokemon_dataset):
  """
  Returns a new dataset where each Pokemon's Typing is Associated with a Speed Stat Value
  """
  # List that stores a Pokemon's typing and its speed stat.
  typing_and_stat = []

  # Iterates through each row of the dataset (cleaned_pokemon)
  # Checks to see if a Pokemon has a primary or secondary typing.
  # If so, then that Pokemon's typing and speed stat will be appended to the typing_and_stat list
  for index in range(len(pokemon_dataset)):
    pokemon_row = pokemon_dataset.iloc[index]
    pokemon_found = Pokemon(pokemon_row)
    speed = pokemon_found.speed

    if pd.notna(pokemon_found.type1):
      typing_and_stat.append({'Typing': pokemon_found.type1, 'Speed': speed})

    if pd.notna(pokemon_found.type2):
      typing_and_stat.append({'Typing': pokemon_found.type2, 'Speed': speed})

  # pd.DataFrame creates a new dataset best on the list
  return pd.DataFrame(typing_and_stat)

def merge_attack(pokemon_dataset):
  """
  Returns a new dataset where each Pokemon's Typing is Associated with an Attack Stat Value
  The Attack Stat Value is calculated by finding the average of the Pokemon's Attack and Sp. Attack Stat
  """
  typing_and_stat = []

  for index in range(len(pokemon_dataset)):
    pokemon_row = pokemon_dataset.iloc[index]
    pokemon_found = Pokemon(pokemon_row)
    attack = (pokemon_found.attack + pokemon_found.sp_atk) / 2

    if pd.notna(pokemon_found.type1):
      typing_and_stat.append({'Typing': pokemon_found.type1, 'Attack': attack})

    if pd.notna(pokemon_found.type2):
      typing_and_stat.append({'Typing': pokemon_found.type2, 'Attack': attack})

  return pd.DataFrame(typing_and_stat)

def merge_defense(pokemon_dataset):
  """
  Returns a new dataset where each Pokemon's Typing is Associated with an Attack Stat Value
  defense is calculated by finding the average of the Pokemon's Defense and Sp. Defense Stat
  """
  typing_and_stat = []

  for index in range(len(pokemon_dataset)):
    pokemon_row = pokemon_dataset.iloc[index]
    pokemon_found = Pokemon(pokemon_row)
    defense = (pokemon_found.defense + pokemon_found.sp_def) / 2

    if pd.notna(pokemon_found.type1):
      typing_and_stat.append({'Typing': pokemon_found.type1, 'Defense': defense})

    if pd.notna(pokemon_found.type2):
      typing_and_stat.append({'Typing': pokemon_found.type2, 'Defense': defense})

  return pd.DataFrame(typing_and_stat)

## Pokemon Typings Based on Average Speed

In [108]:
merged_speed = merge_speed(cleaned_pokemon)
average_speed_by_type = merged_speed.groupby('Typing')['Speed'].mean().reset_index()

average_speed_by_type_sorted = average_speed_by_type.sort_values(by='Speed', ascending=True)

fig = px.bar(
    x=average_speed_by_type_sorted['Speed'],
    y=average_speed_by_type_sorted['Typing'],
    orientation='h',
    labels={'x': 'Average Speed', 'y': 'Pokemon Typing'},
    title='Pokemon Typing Comparison: Average Base Speed'
)

fig.show()

## Pokemon Typings Based on Average Attack

In [109]:
merged_attack = merge_attack(cleaned_pokemon)
average_attack_by_type = merged_attack.groupby('Typing')['Attack'].mean().reset_index()

average_attack_by_type_sorted = average_attack_by_type.sort_values(by='Attack', ascending=True)

fig = px.bar(
    x=average_attack_by_type_sorted['Attack'],
    y=average_attack_by_type_sorted['Typing'],
    orientation='h',
    labels={'x': 'Average Attack', 'y': 'Pokemon Typing'},
    title='Pokemon Typing Comparison: Average Base Attack'
)

fig.show()

## Pokemon Typings Based on Average Defense

In [110]:
merged_defense = merge_defense(cleaned_pokemon)
average_defense_by_type = merged_defense.groupby('Typing')['Defense'].mean().reset_index()

average_defense_by_type_sorted = average_defense_by_type.sort_values(by='Defense', ascending=True)

fig = px.bar(
    x=average_defense_by_type_sorted['Defense'],
    y=average_defense_by_type_sorted['Typing'],
    orientation='h',
    labels={'x': 'Average Defense', 'y': 'Pokemon Typing'},
    title='Pokemon Typing Comparison: Average Base Defense'
)

fig.show()

## Assigning Playstyles to Pokemon Typings

Here are how the following Playstyles are assigned:

Offensive:
* If Attack > Defense
* If Speed > 70

Defense:
* If Attack < Defense
* If Speed < 60

The playstyle 'Balanced' is assigned if neither the Offensive or Defensive playstyle conditions are satisfied.

In [111]:
def assign_playstyle(row):
  """
  Compares the Attack, Defense, and Speed of each of the Pokemon Typings
  Each typing is assigned one of the following playstyles: Offensive, Defensive, and Balanced
  """
  if row['Attack'] > row['Defense'] and row['Speed'] > 70:
    return 'Offensive'
  elif row['Attack'] < row['Defense'] and row['Speed'] < 60:
    return 'Defensive'
  else:
    return 'Balanced'

In [112]:
def merge_playstyle(speed, attack, defense):
  """
  Merges all of the average base stats of the typings to a single dataset
  Typing is the column that all the base stats join on
  Creates a new column called 'Playstyle'
  """
  average_speed_by_type = merged_speed.groupby('Typing')['Speed'].mean().reset_index()
  average_attack_by_type = merged_attack.groupby('Typing')['Attack'].mean().reset_index()
  average_defense_by_type = merged_defense.groupby('Typing')['Defense'].mean().reset_index()
  merge_stats = pd.merge(pd.merge(average_attack_by_type, average_defense_by_type, on='Typing'), average_speed_by_type, on='Typing')
  merge_stats['Playstyle'] = merge_stats.apply(assign_playstyle, axis=1)

  return merge_stats

In [113]:
playstyle = merge_playstyle(merged_speed, merged_attack, merged_defense)
playstyle

Unnamed: 0,Typing,Attack,Defense,Speed,Playstyle
0,Bug,60.022727,65.19697,60.136364,Balanced
1,Dark,81.329545,65.863636,74.568182,Offensive
2,Dragon,90.789474,81.447368,76.578947,Offensive
3,Electric,75.642857,64.47619,81.809524,Offensive
4,Fairy,63.328571,71.914286,52.685714,Defensive
5,Fighting,78.909091,69.988636,69.5,Balanced
6,Fire,83.544643,68.169643,74.303571,Offensive
7,Flying,73.25,67.061111,83.2,Offensive
8,Ghost,75.942857,75.014286,59.228571,Balanced
9,Grass,71.369048,70.291667,58.75,Balanced


## Summary of Visualizations: Playstyles of Different Pokemon Typings

* Flying, Electric, and Dragon are the Typings with the highest base speed.

* Dragon, Fire, and Dark are the Typings with the highest base attack.

* Steek, Rock, and Dragon are the Typings with the highest defense.

Dragon, Fire and Dark are the three typings that can be mostly defined as offensive as they have the highest attack stat out of any typings. Dragon, Fire, and Dark also make up 3 of the 5 fastest Pokemon typings. All 3 of these typings are also categorized as an offensive playstyle.

Steel and Rock are the two typings that can be mostly defined as defensive as they have the highest defense stat out of any typings. Steel and Rock also make up two of the three slowest typings, which suggests that they are designed to withstand hits. Both typings are also categorized as defensive playstyles.

## Conclusion

This analysis demonstrates the following coding techniques:

* Loading and cleaning a dataset
* Data merging and visualizations
* Applying classes and functions
* Statistical analysis