In [1]:
# import stuff
import pandas as p
import numpy as n
import seaborn as s
from matplotlib import pyplot as plt

# Data Set 1; Pokemon :)

This is the data set of all Pokemon as of 10/1/21

[csv link](https://www.kaggle.com/hamdallak/the-world-of-pokemons?select=pokemons+dataset.csv)

## Useful Stats:
- Size of set ~> 1045
- Columns ~> 11

In [2]:
pokemon = p.read_csv('data/pokemon.csv', header = None, 
                     skiprows=1, encoding = 'latin-1', low_memory = False,
                    names = ['Name', 'Version', 'Primary Type' ,'Secondary Type', 'Attack', 'Defense' , 'HP', 'Sp. Attack', 'Sp. Defense', 'Speed', 'Total']
)
p.set_option('display.max_rows', 10)

In [3]:
pokemon

Unnamed: 0,Name,Version,Primary Type,Secondary Type,Attack,Defense,HP,Sp. Attack,Sp. Defense,Speed,Total
0,Bulbasaur,,GRASS,POISON,49,49,45,65,65,45,318
1,Ivysaur,,GRASS,POISON,62,63,60,80,80,60,405
2,Venusaur,,GRASS,POISON,82,83,80,100,100,80,525
3,Venusaur,Mega Venusaur,GRASS,POISON,100,123,80,122,120,80,625
4,Charmander,,FIRE,,52,43,39,60,50,65,309
...,...,...,...,...,...,...,...,...,...,...,...
1040,Glastrier,,ICE,,145,130,100,65,110,30,580
1041,Spectrier,,GHOST,,65,60,100,145,80,130,580
1042,Calyrex,,PSYCHIC,GRASS,80,80,100,80,80,80,500
1043,Calyrex,Ice Rider,PSYCHIC,ICE,165,150,100,85,130,50,680


A) The population we are sampling is the statistics of all pokemon.

B) The features being measured include name, version, types, and battle stats.

C) All features are discrete as well as:

### Qualitative and Nominal:

   - Name
   - Version
   - Primary Type
   - Secondary Type

### Quantitative and Ordinal:

   - Attack
   - Defense
   - HP
   - Sp. Attack
   - Sp. Defense
   - Speed
   - Total

D) The features version, and Second Type have null values for some pokemon. These are still usefull because not all pokemon need to have a different version and second type. This also implies incompleteness.

E) It's needed to include the pokemon with different versions and those with second types. Otherwise it would not be all the pokemon.

F) Some features that could be added are the weaknesses of each pokemon and whether the pokemon evolves. If the pokemon does evolve then what level it evolves at.

In [4]:
# G) A good example of a pivot that works, but is ridiculous.
uni_poke = p.DataFrame()
unique = pokemon['Name'][pokemon['Version'].isnull()]
type_ = pokemon[['Primary Type']]
total = pokemon[['Total']]

uni_poke['Name'] = unique
uni_poke['Type'] = type_
uni_poke['total'] = total

uni_poke.reset_index()
uni_poke.pivot(index = 'Name', columns = 'Type', values = 'total')

Type,BUG,DARK,DRAGON,ELECTRIC,FAIRY,FIGHTING,FIRE,FLYING,GHOST,GRASS,GROUND,ICE,NORMAL,POISON,PSYCHIC,ROCK,STEEL,WATER
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Abomasnow,,,,,,,,,,494.0,,,,,,,,
Abra,,,,,,,,,,,,,,,310.0,,,
Absol,,465.0,,,,,,,,,,,,,,,,
Accelgor,495.0,,,,,,,,,,,,,,,,,
Aerodactyl,,,,,,,,,,,,,,,,515.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zigzagoon,,,,,,,,,,,,,240.0,,,,,
Zoroark,,510.0,,,,,,,,,,,,,,,,
Zorua,,330.0,,,,,,,,,,,,,,,,
Zubat,,,,,,,,,,,,,,245.0,,,,


In [116]:
# An example of a more sensible pivot, but quite costly time wise

# This gets all the unique types
type_stats = p.DataFrame()
type_stats[['Primary Type','Secondary Type']] = pokemon[['Primary Type','Secondary Type']].drop_duplicates(['Primary Type', 'Secondary Type'])
type_stats = type_stats.sort_values('Primary Type').reset_index()
type_stats[['Mean Total']] = 0.0
#print(test)

# Gets the mean total values for each of the unique relevant type combos
# This operation costs >200k iterations...MPI :)
for i in range(0,len(test)):
    sum_total = 0
    mean_total = 1
    count = 0
    actual_type = type_stats[['Primary Type', 'Secondary Type']].loc[i]
    for j in range(0,len(pokemon)):
        str1 = pokemon[['Primary Type', 'Secondary Type']].loc[j].to_string()
        str2 = actual_type.to_string()
        
        if(str1 == str2):
            count += 1
            sum_total += pokemon[['Total']].loc[j].astype(int)
            
    mean_total = sum_total.astype(float) / count
    type_stats.at[i,'Mean Total']= mean_total[0]
    
    
print(type_stats)

     index Primary Type Secondary Type  Mean Total
0      263          BUG       FIGHTING  560.000000
1      252          BUG          STEEL  509.714286
2      870          BUG          FAIRY  384.000000
3      262          BUG           ROCK  438.333333
4       59          BUG          GRASS  384.000000
..     ...          ...            ...         ...
187    272        WATER           ROCK  446.000000
188    215        WATER       ELECTRIC  395.000000
189    229        WATER          FAIRY  442.500000
190    103        WATER        PSYCHIC  480.000000
191    240        WATER         GROUND  433.900000

[192 rows x 4 columns]


In [118]:
#prints out the strongest type combos in ascending order
print(type_stats.sort_values('Mean Total').reset_index())

     level_0  index Primary Type Secondary Type  Mean Total
0          7    351          BUG          GHOST  236.000000
1         11     13          BUG            NaN  287.631579
2        116   1009          ICE            BUG  330.000000
3        132    539       POISON            BUG  330.000000
4        124     52       NORMAL          FAIRY  330.000000
..       ...    ...          ...            ...         ...
187       28    754       DRAGON            ICE  686.666667
188      129    806       POISON         DRAGON  712.250000
189       52   1028        FAIRY          STEEL  720.000000
190      142    936      PSYCHIC         DRAGON  754.000000
191      108    462       GROUND           FIRE  770.000000

[192 rows x 5 columns]
