In [120]:
import pandas as pd

In [141]:
df = pd.read_csv('pokemon_data.csv')
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False


### 1. Reading data in pandas 

In [56]:
# Headers
print(list(df.columns))

print('----------------------------------------------')

# Read data from a column/columns
print(df['Name'][0:2])
# print(df['Name', 'Type 1'][0:2])
    
print('----------------------------------------------')

# Read data from a specific row
print(df.iloc[15:17])

print('----------------------------------------------')

# Read data from a specific location [Row, Column]
print(df.iloc[1, 1])
# print(df.iloc[1, 1:3])

print('----------------------------------------------')

# Read data with a textual splice - Use df.loc
# Can use &, | (AND or OR)  to stack conditions
print(df.loc[(df['Type 1'] == 'Fire') & (df['Type 2'] == 'Dragon')])

['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary']
----------------------------------------------
0    Bulbasaur
1      Ivysaur
Name: Name, dtype: object
----------------------------------------------
     #        Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  \
15  12  Butterfree    Bug  Flying  60      45       50       90       80   
16  13      Weedle    Bug  Poison  40      35       30       20       20   

    Speed  Generation  Legendary  
15     70           1      False  
16     50           1      False  
----------------------------------------------
Ivysaur
----------------------------------------------
   #                       Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  \
7  6  CharizardMega Charizard X   Fire  Dragon  78     130      111      130   

   Sp. Def  Speed  Generation  Legendary  
7       85    100           1      False  


In [49]:
# Using df.iterows()
for index, row in df.iterrows():
    if index < 3:
        print(index, row['Name'])
    else:
        pass

0 Bulbasaur
1 Ivysaur
2 Venusaur


### 2. Sorting/Describing data

In [58]:
# Describe shows summary statistics
df.describe()

Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


In [65]:
# Use sort values for sorting
# Sort by 1 type
df.sort_values(by='HP', ascending=False)[0:5]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
261,242,Blissey,Normal,,255,10,10,75,135,55,2,False
121,113,Chansey,Normal,,250,5,5,35,105,50,1,False
217,202,Wobbuffet,Psychic,,190,33,58,33,58,33,2,False
351,321,Wailord,Water,,170,90,45,90,45,60,3,False
655,594,Alomomola,Water,,165,75,80,40,45,65,5,False


In [69]:
# Sorting by multiple columns
# Ascending can specify different columns too using 1,0 -> 1=True, 0=False
df.sort_values(by=['Type 1', 'HP'], ascending=[1,0])[0:5]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
520,469,Yanmega,Bug,Flying,86,76,86,116,56,95,4,False
698,637,Volcarona,Bug,Fire,85,60,65,135,105,100,5,False
231,214,Heracross,Bug,Fighting,80,125,75,40,95,85,2,False
232,214,HeracrossMega Heracross,Bug,Fighting,80,185,115,40,105,75,2,False
678,617,Accelgor,Bug,,80,70,40,100,60,145,5,False


### 3. Making changes to data

In [142]:
# Create a new column
df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
df.head(2)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405


In [78]:
# Create new column using iloc
# Add all rows, then columns 4 to 9, sum along x-axis
df['Total'] = df.iloc[:, 4:10].sum(axis=1)
df.head(2)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405


In [76]:
# Deleting a column
df = df.drop(columns=['Total'])
df.head(2)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False


In [80]:
# Re-order columns
columns = list(df.columns)
df = df[columns[0:4] + [columns[-1]] + columns[4:12]]
df.head(2)

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False


### 4. Saving dataframe

In [82]:
# Saving dataframe into a csv file
# index=False removes the index from the csv file
# df.to_excel('output.xlsx', index=False)
# df.to_csv('output.txt', index=False, sep='\t') -> For tab separated text csv
df.to_csv('output.csv', index=False)

### 5. Filtering data

In [83]:
# Filtering data using loc
df.loc[(df['Type 1'] == 'Fire') & (df['Type 2'] == 'Dragon')]

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
7,6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False


In [85]:
# Can also filter/splice by this method
df[(df['Type 1'] == 'Fire') & (df['Type 2'] == 'Dragon')]

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
7,6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False


### 6. Resetting index

In [87]:
# Used to reset the index of the dataframe
# drop=True removes the old index column
# inplace=True makes the operation permanent without assigning a new variable
df.reset_index(drop=True)

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


### 7.  Regular expression filtering

In [89]:
# Filtering 'Name' columns with name containg the string 'Mega'
# For the reverse filtering -> 'Name' without 'Mega' -> df.loc[~df['Name'].str.contains('Mega')]
df.loc[df['Name'].str.contains('Mega')][0:5]

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
7,6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False
8,6,CharizardMega Charizard Y,Fire,Flying,634,78,104,78,159,115,100,1,False
12,9,BlastoiseMega Blastoise,Water,,630,79,103,120,135,115,78,1,False
19,15,BeedrillMega Beedrill,Bug,Poison,495,65,150,40,15,80,145,1,False


In [114]:
import re

# flags=re.I -> Used to ignore cases
# regex=True -> turns on regular expression
# pattern -> starts with pi, then contains 0-4 a-z alphabets

df.loc[df['Name'].str.contains('^(pi)[a-z]{0,4}', regex=True, flags=re.I)]

  return func(self, *args, **kwargs)


Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
20,16,Pidgey,Normal,Flying,251,40,45,40,35,35,56,1,False
21,17,Pidgeotto,Normal,Flying,349,63,60,55,50,50,71,1,False
22,18,Pidgeot,Normal,Flying,479,83,80,75,70,70,101,1,False
23,18,PidgeotMega Pidgeot,Normal,Flying,579,83,80,80,135,80,121,1,False
30,25,Pikachu,Electric,,320,35,55,40,50,50,90,1,False
136,127,Pinsir,Bug,,500,65,125,100,55,70,85,1,False
137,127,PinsirMega Pinsir,Bug,Flying,600,65,155,120,65,90,105,1,False
186,172,Pichu,Electric,,205,20,40,15,35,35,60,2,False
219,204,Pineco,Bug,,290,50,65,90,35,35,15,2,False
239,221,Piloswine,Ice,Ground,450,100,100,80,60,60,50,2,False


### 8. Conditional changes

In [122]:
# Used to specify a condition, then make a change to all rows that meet condition
# In this case, we're changing all Type 1 = 'Fire' to Type 1 = 'Flamer'
df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer'
df.loc[df['Type 1'] == 'Flamer'][0:5]


# Alternative way to make conditional change using a for loop
""" 
for index, data in df.iterrows():
    if data['Type 1'] == 'Flamer':
        df['Type 1'] = 'Fire'
"""

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
4,4,Charmander,Flamer,,39,52,43,60,50,65,1,False
5,5,Charmeleon,Flamer,,58,64,58,80,65,80,1,False
6,6,Charizard,Flamer,Flying,78,84,78,109,85,100,1,False
7,6,CharizardMega Charizard X,Flamer,Dragon,78,130,111,130,85,100,1,False
8,6,CharizardMega Charizard Y,Flamer,Flying,78,104,78,159,115,100,1,False


In [145]:
# Making multiple conditional changes using loc
df.loc[df['Total'] > 500, ['Generation', 'Legendary']] = ['Test 1', 'Test 2']
df.head(5)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,Test 1,Test 2,525
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,Test 1,Test 2,625
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False,309


### 9. Aggregation (Group By)

In [162]:
df = pd.read_csv('pokemon_data.csv')

# Other agg functions sum()/count()
# Use .reset_index() to convert into a dataframe from groupby object
df.groupby(by=['Type 1']).mean().sort_values(by=['Attack'], ascending=False)

Unnamed: 0_level_0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Type 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Dragon,474.375,83.3125,112.125,86.375,96.84375,88.84375,83.03125,3.875,0.375
Fighting,363.851852,69.851852,96.777778,65.925926,53.111111,64.703704,66.074074,3.37037,0.0
Ground,356.28125,73.78125,95.75,84.84375,56.46875,62.75,63.90625,3.15625,0.125
Rock,392.727273,65.363636,92.863636,100.795455,63.340909,75.477273,55.909091,3.454545,0.090909
Steel,442.851852,65.222222,92.703704,126.37037,67.518519,80.62963,55.259259,3.851852,0.148148
Dark,461.354839,66.806452,88.387097,70.225806,74.645161,69.516129,76.16129,4.032258,0.064516
Fire,327.403846,69.903846,84.769231,67.769231,88.980769,72.211538,74.442308,3.211538,0.096154
Flying,677.75,70.75,78.75,66.25,94.25,72.5,102.5,5.5,0.5
Poison,251.785714,67.25,74.678571,68.821429,60.428571,64.392857,63.571429,2.535714,0.0
Water,303.089286,72.0625,74.151786,72.946429,74.8125,70.517857,65.964286,2.857143,0.035714


### 10. Working with large amounts of data

In [167]:
# Use chunksize to break dataframe into different batches
# chunksize=5 means load 5 rows

new_df = pd.DataFrame(columns=df.columns)

for df in pd.read_csv('pokemon_data.csv', chunksize=5):
    results = df.groupby(['Type 1']).mean()
    
    new_df = pd.concat([new_df, results])

new_df

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Fire,4.00,,,,39.00,52.00,43.00,60.00,50.00,65.00,1.0,False
Grass,2.25,,,,66.25,73.25,79.50,91.75,91.25,66.25,1.0,False
Fire,5.75,,,,73.00,95.50,81.25,119.50,87.50,95.00,1.0,False
Water,7.00,,,,44.00,48.00,65.00,50.00,64.00,43.00,1.0,False
Bug,10.50,,,,47.50,25.00,45.00,22.50,22.50,37.50,1.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...
Fairy,716.00,,,,126.00,131.00,95.00,131.00,98.00,99.00,6.0,True
Flying,714.50,,,,62.50,50.00,57.50,71.00,60.00,89.00,6.0,False
Fire,721.00,,,,80.00,110.00,120.00,130.00,90.00,70.00,6.0,True
Psychic,720.00,,,,80.00,135.00,60.00,160.00,130.00,75.00,6.0,True
