# Pandas

In [7]:
import pandas as pd
import numpy as np

## Data Frames

In [8]:
data =  [10, 20, 30, 40, 50]
srs = pd.Series(data, index=['a','b', 'c','d','e'])
print(srs[srs>20])

c    30
d    40
e    50
dtype: int64


In [9]:
calories = {'day 01':1750, 'day 02': 2100, 'day 03': 1700}
series = pd.Series(calories)
series.loc['day 03']+= 500
print(series.loc['day 03'])

2200


In [10]:
df =  pd.DataFrame(np.arange(1, 21).reshape(5,4), index=['row1','row2','row3','row4','row5'], columns=['col1','col2','col3','col4'])

In [11]:
df.head()

Unnamed: 0,col1,col2,col3,col4
row1,1,2,3,4
row2,5,6,7,8
row3,9,10,11,12
row4,13,14,15,16
row5,17,18,19,20


In [12]:
df.loc['row2']

col1    5
col2    6
col3    7
col4    8
Name: row2, dtype: int64

In [13]:
df.iloc[0,0:2]

col1    1
col2    2
Name: row1, dtype: int64

## Read CSV

In [14]:
df = pd.read_csv('pokemon.csv')

In [15]:
print(df.to_string())

      No        Name     Type1     Type2  Height  Weight  Legendary
0      1   Bulbasaur     Grass    Poison     0.7     6.9          0
1      2     Ivysaur     Grass    Poison     1.0    13.0          0
2      3    Venusaur     Grass    Poison     2.0   100.0          0
3      4  Charmander      Fire       NaN     0.6     8.5          0
4      5  Charmeleon      Fire       NaN     1.1    19.0          0
5      6   Charizard      Fire    Flying     1.7    90.5          0
6      7    Squirtle     Water       NaN     0.5     9.0          0
7      8   Wartortle     Water       NaN     1.0    22.5          0
8      9   Blastoise     Water       NaN     1.6    85.5          0
9     10    Caterpie       Bug       NaN     0.3     2.9          0
10    11     Metapod       Bug       NaN     0.7     9.9          0
11    12  Butterfree       Bug    Flying     1.1    32.0          0
12    13      Weedle       Bug    Poison     0.3     3.2          0
13    14      Kakuna       Bug    Poison     0.6

In [16]:
print(df[['Name', 'Height', 'Weight']].to_string())

           Name  Height  Weight
0     Bulbasaur     0.7     6.9
1       Ivysaur     1.0    13.0
2      Venusaur     2.0   100.0
3    Charmander     0.6     8.5
4    Charmeleon     1.1    19.0
5     Charizard     1.7    90.5
6      Squirtle     0.5     9.0
7     Wartortle     1.0    22.5
8     Blastoise     1.6    85.5
9      Caterpie     0.3     2.9
10      Metapod     0.7     9.9
11   Butterfree     1.1    32.0
12       Weedle     0.3     3.2
13       Kakuna     0.6    10.0
14     Beedrill     1.0    29.5
15       Pidgey     0.3     1.8
16    Pidgeotto     1.1    30.0
17      Pidgeot     1.5    39.5
18      Rattata     0.3     3.5
19     Raticate     0.7    18.5
20      Spearow     0.3     2.0
21       Fearow     1.2    38.0
22        Ekans     2.0     6.9
23        Arbok     3.5    65.0
24      Pikachu     0.4     6.0
25       Raichu     0.8    30.0
26    Sandshrew     0.6    12.0
27    Sandslash     1.0    29.5
28     Nidoran♀     0.4     7.0
29     Nidorina     0.8    20.0
30    Ni

In [19]:
print(df.loc[2:101, ['Name','Height', 'Weight']])

           Name  Height  Weight
2      Venusaur     2.0   100.0
3    Charmander     0.6     8.5
4    Charmeleon     1.1    19.0
5     Charizard     1.7    90.5
6      Squirtle     0.5     9.0
..          ...     ...     ...
97       Krabby     0.4     6.5
98      Kingler     1.3    60.0
99      Voltorb     0.5    10.4
100   Electrode     1.2    66.6
101   Exeggcute     0.4     2.5

[100 rows x 3 columns]


### Filtering using pandas

In [21]:
tall_pokemons = df[df["Height"]>=2]
print(tall_pokemons)

      No        Name    Type1    Type2  Height  Weight  Legendary
2      3    Venusaur    Grass   Poison     2.0   100.0          0
22    23       Ekans   Poison      NaN     2.0     6.9          0
23    24       Arbok   Poison      NaN     3.5    65.0          0
94    95        Onix     Rock   Ground     8.8   210.0          0
102  103   Exeggutor    Grass  Psychic     2.0   120.0          0
114  115  Kangaskhan   Normal      NaN     2.2    80.0          0
129  130    Gyarados    Water   Flying     6.5   235.0          0
130  131      Lapras    Water      Ice     2.5   220.0          0
142  143     Snorlax   Normal      NaN     2.1   460.0          0
145  146     Moltres     Fire   Flying     2.0    60.0          1
147  148   Dragonair   Dragon      NaN     4.0    16.5          0
148  149   Dragonite   Dragon   Flying     2.2   210.0          0
149  150      Mewtwo  Psychic      NaN     2.0   122.0          1


In [22]:
heavy_pokemons = df[df["Weight"]>=100]
print(heavy_pokemons)

      No       Name     Type1    Type2  Height  Weight  Legendary
2      3   Venusaur     Grass   Poison     2.0   100.0          0
58    59   Arcanine      Fire      NaN     1.9   155.0          0
67    68    Machamp  Fighting      NaN     1.6   130.0          0
74    75   Graveler      Rock   Ground     1.0   105.0          0
75    76      Golem      Rock   Ground     1.4   300.0          0
86    87    Dewgong     Water      Ice     1.7   120.0          0
90    91   Cloyster     Water      Ice     1.5   132.5          0
94    95       Onix      Rock   Ground     8.8   210.0          0
102  103  Exeggutor     Grass  Psychic     2.0   120.0          0
110  111    Rhyhorn    Ground     Rock     1.0   115.0          0
111  112     Rhydon    Ground     Rock     1.9   120.0          0
129  130   Gyarados     Water   Flying     6.5   235.0          0
130  131     Lapras     Water      Ice     2.5   220.0          0
142  143    Snorlax    Normal      NaN     2.1   460.0          0
148  149  

In [24]:
legendary_pokemons = df[df["Legendary"]==1]
print(legendary_pokemons)

      No      Name     Type1   Type2  Height  Weight  Legendary
143  144  Articuno       Ice  Flying     1.7    55.4          1
144  145    Zapdos  Electric  Flying     1.6    52.6          1
145  146   Moltres      Fire  Flying     2.0    60.0          1
149  150    Mewtwo   Psychic     NaN     2.0   122.0          1


### Aggregation

In [25]:
print(df.mean(numeric_only=True))

No           75.500000
Height        1.200000
Weight       46.231333
Legendary     0.026667
dtype: float64


In [26]:
print(df.sum(numeric_only=True))

No           11325.0
Height         180.0
Weight        6934.7
Legendary        4.0
dtype: float64


In [28]:
print(df.min(numeric_only=True))

No           1.0
Height       0.2
Weight       0.1
Legendary    0.0
dtype: float64


In [29]:
print(df.max(numeric_only=True))

No           150.0
Height         8.8
Weight       460.0
Legendary      1.0
dtype: float64


In [30]:
print(df.count())

No           150
Name         150
Type1        150
Type2         67
Height       150
Weight       150
Legendary    150
dtype: int64


In [31]:
print(df["Height"].mean())

1.2


In [32]:
print(df["Height"].sum())

180.0


In [33]:
print(df["Height"].min())

0.2


In [34]:
print(df["Height"].max())

8.8


## Data cleaning

In [46]:
df = df.drop(columns=['No'])
df

Unnamed: 0,Name,Type1,Type2,Height,Weight
0,Bulbasaur,Grass,Poison,0.7,6.9
1,Ivysaur,Grass,Poison,1.0,13.0
2,Venusaur,Grass,Poison,2.0,100.0
3,Charmander,Fire,,0.6,8.5
4,Charmeleon,Fire,,1.1,19.0
...,...,...,...,...,...
145,Moltres,Fire,Flying,2.0,60.0
146,Dratini,Dragon,,1.8,3.3
147,Dragonair,Dragon,,4.0,16.5
148,Dragonite,Dragon,Flying,2.2,210.0


In [47]:
df = df.fillna({"Type2":"None"})
print(df.to_string())

           Name     Type1     Type2  Height  Weight
0     Bulbasaur     Grass    Poison     0.7     6.9
1       Ivysaur     Grass    Poison     1.0    13.0
2      Venusaur     Grass    Poison     2.0   100.0
3    Charmander      Fire      None     0.6     8.5
4    Charmeleon      Fire      None     1.1    19.0
5     Charizard      Fire    Flying     1.7    90.5
6      Squirtle     Water      None     0.5     9.0
7     Wartortle     Water      None     1.0    22.5
8     Blastoise     Water      None     1.6    85.5
9      Caterpie       Bug      None     0.3     2.9
10      Metapod       Bug      None     0.7     9.9
11   Butterfree       Bug    Flying     1.1    32.0
12       Weedle       Bug    Poison     0.3     3.2
13       Kakuna       Bug    Poison     0.6    10.0
14     Beedrill       Bug    Poison     1.0    29.5
15       Pidgey    Normal    Flying     0.3     1.8
16    Pidgeotto    Normal    Flying     1.1    30.0
17      Pidgeot    Normal    Flying     1.5    39.5
18      Ratt