# Pandas Review using a Pokemon Dataset

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('https://gist.githubusercontent.com/armgilles/\
194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv')

### Practice Exercises

#### 1. Obtain the following information:
    
    - dimensions
    - dtypes
    - column names
    - summary statistics


In [2]:
df.shape

(800, 13)

In [3]:
df.dtypes

#              int64
Name          object
Type 1        object
Type 2        object
Total          int64
HP             int64
Attack         int64
Defense        int64
Sp. Atk        int64
Sp. Def        int64
Speed          int64
Generation     int64
Legendary       bool
dtype: object

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   Type 2      414 non-null    object
 4   Total       800 non-null    int64 
 5   HP          800 non-null    int64 
 6   Attack      800 non-null    int64 
 7   Defense     800 non-null    int64 
 8   Sp. Atk     800 non-null    int64 
 9   Sp. Def     800 non-null    int64 
 10  Speed       800 non-null    int64 
 11  Generation  800 non-null    int64 
 12  Legendary   800 non-null    bool  
dtypes: bool(1), int64(9), object(3)
memory usage: 75.9+ KB


In [5]:
df.describe()

Unnamed: 0,#,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,330.0,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,450.0,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,515.0,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


#### 2. Change all the column names
      - lowercase letters
      - remove whitespace
      - remove periods

In [7]:
df.columns = [col.lower() for col in df.columns]

In [8]:
df.columns = df.columns.str.strip().str.replace('.','', regex=False)

In [9]:
df = df.rename(columns={'sp atk': 'special_atk', 'sp def': 'special_def', 'type 1': 'type_1', 'type 2': 'type_2'})
df

Unnamed: 0,#,name,type_1,type_2,total,hp,attack,defense,special_atk,special_def,speed,generation,legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


#### 3. What is the highest HP value present?

In [11]:
df['hp'].max()

255

In [12]:
df['hp'].nlargest()

261    255
121    250
217    190
351    170
655    165
Name: hp, dtype: int64

In [10]:
df[df.hp == df.hp.max()]

Unnamed: 0,#,name,type_1,type_2,total,hp,attack,defense,special_atk,special_def,speed,generation,legendary
261,242,Blissey,Normal,,540,255,10,10,75,135,55,2,False


#### 4. Which Pokemon possess(es) the highest HP value?
- note: the `name` column is the pokemon name

In [63]:
most_hp = df.hp.nlargest(1)

In [72]:
most_hp

261    255
Name: hp, dtype: int64

In [13]:
df.groupby('name').hp.max().nlargest(1)

name
Blissey    255
Name: hp, dtype: int64

#### 5. How many different types are represented in Type 1? What are they?


In [88]:
df['type_1'].describe()

count       800
unique       18
top       Water
freq        112
Name: type_1, dtype: object

In [85]:
df['type_1'].unique()

array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',
       'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',
       'Dragon', 'Dark', 'Steel', 'Flying'], dtype=object)

#### 6. Number of Pokemon whose Type 2 is Ghost

In [96]:
df.type_2.value_counts()

Flying      97
Ground      35
Poison      34
Psychic     33
Fighting    26
Grass       25
Fairy       23
Steel       22
Dark        20
Dragon      18
Water       14
Ghost       14
Ice         14
Rock        14
Fire        12
Electric     6
Normal       4
Bug          3
Name: type_2, dtype: int64

In [103]:
df.loc[df['type_2']== 'Ghost'].sort_values(by='name')

Unnamed: 0,#,name,type_1,type_2,total,hp,attack,defense,special_atk,special_def,speed,generation,legendary
750,681,AegislashBlade Forme,Steel,Ghost,520,60,150,50,150,50,60,6,False
751,681,AegislashShield Forme,Steel,Ghost,520,60,50,150,50,150,60,6,False
749,680,Doublade,Steel,Ghost,448,59,110,150,45,49,35,6,False
653,592,Frillish,Water,Ghost,335,55,40,50,65,85,40,5,False
530,478,Froslass,Ice,Ghost,480,70,80,70,80,70,110,4,False
683,622,Golett,Ground,Ghost,303,59,74,50,35,50,35,5,False
684,623,Golurk,Ground,Ghost,483,89,124,80,55,80,55,5,False
748,679,Honedge,Steel,Ghost,325,45,80,100,35,37,28,6,False
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
654,593,Jellicent,Water,Ghost,480,100,60,70,85,105,60,5,False


#### 7. Percentage of Pokemon whose Type 2 is Ghost

In [110]:
round((df.loc[df['type_2']== 'Ghost'].value_counts().sum()/ len(df))*100, 2)

1.75

#### 8. Number of Pokemon whose Attack is greater than Defense

In [119]:
df[df.attack > df.defense].value_counts().sum()

223

#### 9. What is the slowest speed for Grass type (either type 1 or type 2)

In [125]:
df.loc[df['type_1']== 'Grass']['speed'].min()

10

In [126]:
df.loc[df['type_2']== 'Grass']['speed'].min()

23

#### 10. Find the average speed by Generation

In [28]:
df.groupby('generation')['speed'].mean()

generation
1    72.584337
2    61.811321
3    66.925000
4    71.338843
5    68.078788
6    66.439024
Name: speed, dtype: float64

In [23]:
df

Unnamed: 0,#,name,type_1,type_2,total,hp,attack,defense,special_atk,special_def,speed,generation,legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


#### 11. How many LEGENDARY Pokemon are DRAGON type?

In [49]:
legendary = df.loc[df['legendary']==True]
legendary.loc[df['type_1']=='Dragon'].value_counts().sum()

12

In [47]:
legendary.loc[df['type_2']=='Dragon'].value_counts().sum()

4