# Pandas Essentials

In [5]:
import pandas as pd
import numpy as np

#### Creating DataFrame from CSV file

In [6]:
# Creating CSV files from excel
df = pd.read_csv('pokemon.csv')
df.head()

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,3,122,120,80,grass,poison,100.0,1,0
3,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,4,60,50,65,fire,,8.5,1,0
4,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,5,80,65,80,fire,,19.0,1,0


#### Checking number of rows and columns

In [7]:
# Checking shape of dataset
df.shape

(801, 41)

#### Checking for column names

In [8]:
# Checking column names
df.columns

Index(['abilities', 'against_bug', 'against_dark', 'against_dragon',
       'against_electric', 'against_fairy', 'against_fight', 'against_fire',
       'against_flying', 'against_ghost', 'against_grass', 'against_ground',
       'against_ice', 'against_normal', 'against_poison', 'against_psychic',
       'against_rock', 'against_steel', 'against_water', 'attack',
       'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate',
       'classfication', 'defense', 'experience_growth', 'height_m', 'hp',
       'japanese_name', 'name', 'percentage_male', 'pokedex_number',
       'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg',
       'generation', 'is_legendary'],
      dtype='object')

#### Checking datatypes

In [9]:
# checking general information about dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 801 entries, 0 to 800
Data columns (total 41 columns):
abilities            801 non-null object
against_bug          801 non-null float64
against_dark         801 non-null float64
against_dragon       801 non-null float64
against_electric     801 non-null float64
against_fairy        801 non-null float64
against_fight        801 non-null float64
against_fire         801 non-null float64
against_flying       801 non-null float64
against_ghost        801 non-null float64
against_grass        801 non-null float64
against_ground       801 non-null float64
against_ice          801 non-null float64
against_normal       801 non-null float64
against_poison       801 non-null float64
against_psychic      801 non-null float64
against_rock         801 non-null float64
against_steel        801 non-null float64
against_water        801 non-null float64
attack               801 non-null int64
base_egg_steps       801 non-null int64
base_happiness    

In [10]:
df.dtypes

abilities             object
against_bug          float64
against_dark         float64
against_dragon       float64
against_electric     float64
against_fairy        float64
against_fight        float64
against_fire         float64
against_flying       float64
against_ghost        float64
against_grass        float64
against_ground       float64
against_ice          float64
against_normal       float64
against_poison       float64
against_psychic      float64
against_rock         float64
against_steel        float64
against_water        float64
attack                 int64
base_egg_steps         int64
base_happiness         int64
base_total             int64
capture_rate          object
classfication         object
defense                int64
experience_growth      int64
height_m             float64
hp                     int64
japanese_name         object
name                  object
percentage_male      float64
pokedex_number         int64
sp_attack              int64
sp_defense    

#### Numerical variables in dataset

In [11]:
num_features = df.select_dtypes(include=[np.number])
num_features.columns

Index(['against_bug', 'against_dark', 'against_dragon', 'against_electric',
       'against_fairy', 'against_fight', 'against_fire', 'against_flying',
       'against_ghost', 'against_grass', 'against_ground', 'against_ice',
       'against_normal', 'against_poison', 'against_psychic', 'against_rock',
       'against_steel', 'against_water', 'attack', 'base_egg_steps',
       'base_happiness', 'base_total', 'defense', 'experience_growth',
       'height_m', 'hp', 'percentage_male', 'pokedex_number', 'sp_attack',
       'sp_defense', 'speed', 'weight_kg', 'generation', 'is_legendary'],
      dtype='object')

#### Categorical Variables in dataset

In [12]:
cat_features = df.select_dtypes(include=[np.object])
cat_features.columns

Index(['abilities', 'capture_rate', 'classfication', 'japanese_name', 'name',
       'type1', 'type2'],
      dtype='object')

#### Estimate Skewness

In [13]:
df.skew()

against_bug          1.756470
against_dark         1.721120
against_dragon       0.003172
against_electric     0.934840
against_fairy        2.475132
against_fight        1.257115
against_fire         1.610319
against_flying       1.770791
against_ghost        0.453467
against_grass        1.971150
against_ground       1.079249
against_ice          1.571536
against_normal      -2.289273
against_poison       1.373468
against_psychic      0.937089
against_rock         1.837769
against_steel        1.848439
against_water        2.080850
attack               0.530811
base_egg_steps       2.955754
base_happiness      -1.182299
base_total           0.174593
defense              1.121058
experience_growth    0.311128
height_m             5.080016
hp                   1.826591
percentage_male      0.066347
pokedex_number       0.000000
sp_attack            0.778371
sp_defense           0.867620
speed                0.438918
weight_kg            4.871044
generation           0.117207
is_legenda

#### Examine Kurtosis

In [14]:
df.kurt()

against_bug           4.997981
against_dark          5.410047
against_dragon        4.450350
against_electric      2.113157
against_fairy         9.832774
against_fight         2.681553
against_fire          3.748827
against_flying        5.061409
against_ghost         2.033599
against_grass         4.588665
against_ground        2.667510
against_ice           3.365955
against_normal        4.145825
against_poison        5.085349
against_psychic       3.624819
against_rock          4.577708
against_steel         6.119285
against_water         6.519262
attack                0.071337
base_egg_steps        7.581303
base_happiness        5.936989
base_total           -0.527958
defense               2.583359
experience_growth     2.852908
height_m             43.104657
hp                    8.334973
percentage_male       1.159101
pokedex_number       -1.200000
sp_attack             0.412486
sp_defense            1.525919
speed                -0.118668
weight_kg            31.735820
generati

#### Checking Statistical Characteristics of Numerical Columns/

In [15]:
# to know basic statistical characteristics of dataset
# we use describe for numerical columns

df.describe()

Unnamed: 0,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,against_grass,...,height_m,hp,percentage_male,pokedex_number,sp_attack,sp_defense,speed,weight_kg,generation,is_legendary
count,801.0,801.0,801.0,801.0,801.0,801.0,801.0,801.0,801.0,801.0,...,781.0,801.0,703.0,801.0,801.0,801.0,801.0,781.0,801.0,801.0
mean,0.996255,1.057116,0.968789,1.07397,1.068976,1.065543,1.135456,1.192884,0.985019,1.03402,...,1.163892,68.958801,55.155761,401.0,71.305868,70.911361,66.334582,61.378105,3.690387,0.087391
std,0.597248,0.438142,0.353058,0.654962,0.522167,0.717251,0.691853,0.604488,0.558256,0.788896,...,1.080326,26.576015,20.261623,231.373075,32.353826,27.942501,28.907662,109.354766,1.93042,0.282583
min,0.25,0.25,0.0,0.0,0.25,0.0,0.25,0.25,0.0,0.25,...,0.1,1.0,0.0,1.0,10.0,20.0,5.0,0.1,1.0,0.0
25%,0.5,1.0,1.0,0.5,1.0,0.5,0.5,1.0,1.0,0.5,...,0.6,50.0,50.0,201.0,45.0,50.0,45.0,9.0,2.0,0.0
50%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,65.0,50.0,401.0,65.0,66.0,65.0,27.3,4.0,0.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,...,1.5,80.0,50.0,601.0,91.0,90.0,85.0,64.8,5.0,0.0
max,4.0,4.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,14.5,255.0,100.0,801.0,194.0,230.0,180.0,999.9,7.0,1.0


#### checking how many times each value is repeated in categorical variables

In [16]:
# for categorical columns we use value_counts()
df['type1'].value_counts()

water       114
normal      105
grass        78
bug          72
psychic      53
fire         52
rock         45
electric     39
ground       32
poison       32
dark         29
fighting     28
dragon       27
ghost        27
steel        24
ice          23
fairy        18
flying        3
Name: type1, dtype: int64

#### Sorting a DataFrame by a column

In [17]:
df.sort_values(by=['abilities'], ascending=True).head()

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
473,"['Adaptability', 'Download', 'Analytic']",1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,0.0,...,,474,135,75,90,normal,,34.0,4,0
424,"['Aftermath', 'Unburden', 'Flare Boost']",0.25,2.0,1.0,2.0,1.0,0.0,1.0,1.0,2.0,...,50.0,425,60,44,70,ghost,flying,1.2,4,0
425,"['Aftermath', 'Unburden', 'Flare Boost']",0.25,2.0,1.0,2.0,1.0,0.0,1.0,1.0,2.0,...,50.0,426,90,54,80,ghost,flying,15.0,4,0
383,['Air Lock'],0.5,1.0,2.0,1.0,2.0,0.5,0.5,1.0,1.0,...,,384,180,100,115,dragon,flying,206.5,3,1
452,"['Anticipation', 'Dry Skin', 'Poison Touch']",0.25,0.5,1.0,1.0,1.0,0.5,1.0,2.0,1.0,...,50.0,453,61,40,50,poison,fighting,23.0,4,0


In [18]:
df.sort_index(axis=1)

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,japanese_name,name,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg
0,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,Fushigidaneフシギダネ,Bulbasaur,88.1,1,65,65,45,grass,poison,6.9
1,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,Fushigisouフシギソウ,Ivysaur,88.1,2,80,80,60,grass,poison,13.0
2,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,Fushigibanaフシギバナ,Venusaur,88.1,3,122,120,80,grass,poison,100.0
3,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,Hitokageヒトカゲ,Charmander,88.1,4,60,50,65,fire,,8.5
4,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,Lizardoリザード,Charmeleon,88.1,5,80,65,80,fire,,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,['Beast Boost'],0.25,1.0,0.5,2.0,0.5,1.0,2.0,0.5,1.0,...,Tekkaguyaテッカグヤ,Celesteela,,797,107,101,61,steel,flying,999.9
797,['Beast Boost'],1.00,1.0,0.5,0.5,0.5,2.0,4.0,1.0,1.0,...,Kamiturugiカミツルギ,Kartana,,798,59,31,109,grass,steel,0.1
798,['Beast Boost'],2.00,0.5,2.0,0.5,4.0,2.0,0.5,1.0,0.5,...,Akuzikingアクジキング,Guzzlord,,799,97,53,43,dark,dragon,888.0
799,['Prism Armor'],2.00,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,Necrozmaネクロズマ,Necrozma,,800,127,89,79,psychic,,230.0


#### Writing conditional Statements

In [19]:
# writing conditions
df[df['weight_kg'] > 100].head()

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
58,"['Intimidate', 'Flash Fire', 'Justified']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,75.4,59,100,80,95,fire,,155.0,1,0
67,"['Guts', 'No Guard', 'Steadfast']",0.5,0.5,1.0,1.0,2.0,1.0,1.0,2.0,1.0,...,75.4,68,65,85,55,fighting,,130.0,1,0
86,"['Thick Fat', 'Hydration', 'Ice Body']",1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,...,50.0,87,70,95,70,water,ice,120.0,1,0
90,"['Shell Armor', 'Skill Link', 'Overcoat']",1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,...,50.0,91,85,45,70,water,ice,132.5,1,0
94,"['Rock Head', 'Sturdy', 'Weak Armor']",1.0,1.0,1.0,0.0,1.0,2.0,0.5,0.5,1.0,...,50.0,95,30,45,70,rock,ground,210.0,1,0


#### loc and iloc

"loc" is label based, which means that we have to specify the name of the rows and columns that we need to filterout.

"iloc" is index-based. So here, we have to specify rows and columns by their index.

In [20]:
# you have to mention column names for loc

df.loc[0:5,'abilities':'against_dark']

Unnamed: 0,abilities,against_bug,against_dark
0,"['Overgrow', 'Chlorophyll']",1.0,1.0
1,"['Overgrow', 'Chlorophyll']",1.0,1.0
2,"['Overgrow', 'Chlorophyll']",1.0,1.0
3,"['Blaze', 'Solar Power']",0.5,1.0
4,"['Blaze', 'Solar Power']",0.5,1.0
5,"['Blaze', 'Solar Power']",0.25,1.0


In [21]:
# you have to mention column index for iloc
df.iloc[0:5, 2:5]

Unnamed: 0,against_dark,against_dragon,against_electric
0,1.0,1.0,0.5
1,1.0,1.0,0.5
2,1.0,1.0,0.5
3,1.0,1.0,1.0
4,1.0,1.0,1.0


#### Writing conditional statements with indexes

In [22]:
# indexing with conditions

df.loc[df.height_m > 10]

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
320,"['Water Veil', 'Oblivious', 'Pressure']",1.0,1.0,1.0,2.0,1.0,1.0,0.5,1.0,1.0,...,50.0,321,90,45,60,water,,398.0,3,0


#### Multiple conditonal statements

In [23]:
# wrinting multiple conditions

df.loc[(df.height_m > 5) & (df.speed > 100)]

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
248,"['Pressure', 'Multiscale']",1.0,2.0,1.0,2.0,1.0,0.25,1.0,1.0,2.0,...,,249,90,154,110,psychic,flying,216.0,2,1
383,['Air Lock'],0.5,1.0,2.0,1.0,2.0,0.5,0.5,1.0,1.0,...,,384,180,100,115,dragon,flying,206.5,3,1


#### Slicing

In [24]:
# selecting range

df.loc[0:3]

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,3,122,120,80,grass,poison,100.0,1,0
3,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,4,60,50,65,fire,,8.5,1,0


#### Retriving the selected rows that met condition

In [25]:
# selecting only rows with condition

df.loc[(df.speed > 100), ['name', 'speed']]

Unnamed: 0,name,speed
14,Beedrill,145
17,Pidgeot,121
25,Raichu,110
37,Ninetales,109
50,Dugtrio,110
...,...,...
773,Minior,120
784,Tapu Koko,130
792,Nihilego,103
794,Pheromosa,151


#### Adding column to dataframe based on condition

In [26]:
#  Adding column to the dataframe based on condition

df['SpeedType'] = ['SuperFast' if x > 100 else 'Fast' for x in df['speed']]
df['SpeedType'].value_counts()

Fast         705
SuperFast     96
Name: SpeedType, dtype: int64

#### Adding column based on multiple conditions

In [27]:
# adding column based on multiple conditions
df.loc[(df['speed'] >= 150),'SpeedType'] = 'Furions' 
df.loc[(df['speed'] >= 100) & (df['speed'] <150),'SpeedType'] = 'Fierce'
df.loc[(df['speed'] >= 50) & (df['speed'] <100),'SpeedType'] = 'Scary'
df.loc[(df['speed'] >= 0) & (df['speed'] <50),'SpeedType'] = 'Normal'

df['SpeedType'].value_counts()

Scary      436
Normal     243
Fierce     116
Furions      6
Name: SpeedType, dtype: int64

#### The last row

In [28]:
# to get last row

df[-1:]

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary,SpeedType
800,['Soul-Heart'],0.25,0.5,0.0,1.0,0.5,1.0,2.0,0.5,1.0,...,801,130,115,65,steel,fairy,80.5,7,1,Scary


#### Applying Functions to each column

In [29]:
# use apply() to apply functions to each row

df['speed'].apply(np.sqrt)

0       6.708204
1       7.745967
2       8.944272
3       8.062258
4       8.944272
         ...    
796     7.810250
797    10.440307
798     6.557439
799     8.888194
800     8.062258
Name: speed, Length: 801, dtype: float64

#### Apply functions using map() function

In [30]:
df['is_legendary'].value_counts()

0    731
1     70
Name: is_legendary, dtype: int64

In [31]:
dic = {0:'No', 1:'YES'}
df['is_legendary'].map(dic).value_counts()

No     731
YES     70
Name: is_legendary, dtype: int64

map() function works only with series. Passing a dataframe will return an error.

#### Replacing the values

In [32]:
# replace method can take dictionary
df['is_legendary'].replace({0:'NOO', 1:'YESSS'}).value_counts()

NOO      731
YESSS     70
Name: is_legendary, dtype: int64

In [33]:
# it can take list 
df['is_legendary'].replace([0,1], ['Zero', 'One']).value_counts()

Zero    731
One      70
Name: is_legendary, dtype: int64

In [34]:
s = pd.Series([0,1,1,1])
s.replace([0,1,1,1],[0,0,0,0])

0    0
1    0
2    0
3    0
dtype: int64

#### Groupby

In [35]:
df.head()

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary,SpeedType
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,1,65,65,45,grass,poison,6.9,1,0,Normal
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,2,80,80,60,grass,poison,13.0,1,0,Scary
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,3,122,120,80,grass,poison,100.0,1,0,Scary
3,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,4,60,50,65,fire,,8.5,1,0,Scary
4,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,5,80,65,80,fire,,19.0,1,0,Scary


In [36]:
df.groupby(['SpeedType'])['speed','height_m'].mean()

Unnamed: 0_level_0,speed,height_m
SpeedType,Unnamed: 1_level_1,Unnamed: 2_level_1
Fierce,111.241379,1.384821
Furions,156.833333,1.466667
Normal,34.292181,0.866525
Scary,71.0,1.266042


#### Summary Tables

In [37]:
pd.crosstab(df['SpeedType'], df['is_legendary'])

is_legendary,0,1
SpeedType,Unnamed: 1_level_1,Unnamed: 2_level_1
Fierce,86,30
Furions,4,2
Normal,240,3
Scary,401,35


#### DataFrame transformations

In [38]:
# creating a new column by transforming columns

df['formula'] = ((df['sp_attack'] + df['sp_defense'])/df['speed'])*100
df['formula'].head()

0    288.888889
1    266.666667
2    302.500000
3    169.230769
4    181.250000
Name: formula, dtype: float64

#### Dropping a columns

In [39]:
df.drop(['formula'], axis=1, inplace=True)
df.head()

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary,SpeedType
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,1,65,65,45,grass,poison,6.9,1,0,Normal
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,2,80,80,60,grass,poison,13.0,1,0,Scary
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,3,122,120,80,grass,poison,100.0,1,0,Scary
3,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,4,60,50,65,fire,,8.5,1,0,Scary
4,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,5,80,65,80,fire,,19.0,1,0,Scary


#### Correlation for Numerical Variables

In [40]:
correlation = df.corr()
correlation

Unnamed: 0,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,against_grass,...,height_m,hp,percentage_male,pokedex_number,sp_attack,sp_defense,speed,weight_kg,generation,is_legendary
against_bug,1.0,0.230107,0.16543,-0.246943,0.239566,0.137902,0.202778,0.183343,0.129174,0.079197,...,-0.060858,0.034897,-0.048373,0.004618,0.055352,-0.002342,-0.043802,-0.031909,-0.001549,0.027864
against_dark,0.230107,1.0,0.14083,-0.01583,-0.301354,-0.357981,0.010527,-0.179697,0.672337,-0.006533,...,0.019219,0.010589,-0.097547,0.009066,0.170849,0.132507,-0.000326,0.038871,-0.016013,0.136315
against_dragon,0.16543,0.14083,1.0,-0.108928,0.439705,0.035237,-0.26157,0.06485,-0.049941,-0.037135,...,0.164464,0.089721,0.061785,0.000872,0.039739,-0.047416,0.078123,0.126003,-0.025201,0.014844
against_electric,-0.246943,-0.01583,-0.108928,1.0,-0.089864,-0.102798,-0.279029,-0.111461,-0.073031,0.056209,...,0.003068,-0.035354,0.051265,-0.068552,0.022305,0.019193,0.111422,-0.102926,-0.06318,-0.023151
against_fairy,0.239566,-0.301354,0.439705,-0.089864,1.0,0.157712,-0.169489,0.199862,-0.120806,0.052899,...,0.11536,0.129284,0.010527,0.176651,-0.010296,0.002754,0.065401,0.098523,0.150801,0.050165
against_fight,0.137902,-0.357981,0.035237,-0.102798,0.157712,1.0,-0.07648,-0.318941,-0.546982,0.269157,...,0.059184,0.109425,0.048101,0.018296,-0.118481,-0.04446,-0.050495,0.161564,0.000681,-0.059132
against_fire,0.202778,0.010527,-0.26157,-0.279029,-0.169489,-0.07648,1.0,0.528127,0.05057,-0.463634,...,-0.085808,-0.099242,-0.120378,0.052012,-0.088578,-0.01952,-0.079129,-0.044124,0.061157,-0.011073
against_flying,0.183343,-0.179697,0.06485,-0.111461,0.199862,-0.318941,0.528127,1.0,0.013204,-0.358467,...,-0.072985,-0.060624,0.022895,0.020324,-0.111243,-0.079022,-0.043381,-0.126744,0.058204,-0.062214
against_ghost,0.129174,0.672337,-0.049941,-0.073031,-0.120806,-0.546982,0.05057,0.013204,1.0,0.029542,...,0.032553,-0.062052,0.059852,0.063189,0.233309,0.195439,-0.029007,0.047869,0.038027,0.170746
against_grass,0.079197,-0.006533,-0.037135,0.056209,0.052899,0.269157,-0.463634,-0.358467,0.029542,1.0,...,0.030583,0.056886,0.070926,-0.092353,-0.093899,-0.056427,-0.153618,0.050129,-0.107987,-0.070826


#### Set membership check with isin()

In [41]:
# instances where it is Normal it returns True
df['SpeedType'].isin(['Normal'])

0       True
1      False
2      False
3      False
4      False
       ...  
796    False
797    False
798     True
799    False
800    False
Name: SpeedType, Length: 801, dtype: bool

#### To idently unique elements in column

In [42]:
df['SpeedType'].unique()

array(['Normal', 'Scary', 'Fierce', 'Furions'], dtype=object)

### Identifying NULL values in all columns

In [43]:
df.isnull().sum()

abilities              0
against_bug            0
against_dark           0
against_dragon         0
against_electric       0
against_fairy          0
against_fight          0
against_fire           0
against_flying         0
against_ghost          0
against_grass          0
against_ground         0
against_ice            0
against_normal         0
against_poison         0
against_psychic        0
against_rock           0
against_steel          0
against_water          0
attack                 0
base_egg_steps         0
base_happiness         0
base_total             0
capture_rate           0
classfication          0
defense                0
experience_growth      0
height_m              20
hp                     0
japanese_name          0
name                   0
percentage_male       98
pokedex_number         0
sp_attack              0
sp_defense             0
speed                  0
type1                  0
type2                384
weight_kg             20
generation             0
