# Pandas
- Good for handling dataset with different data types

In [1]:
import pandas as pd

## DataFrame

### Create

#### Dictionary

In [2]:
dict = { 
    "country": ["Brazil", "Russia", "India", "China", "South Africa"],
    "capital": ["Brasilia", "Moscow", "New Delhi", "Beijing", "Pretoria"],
    "area": [8.516, 17.10, 3.286, 9.597, 1.221],
    "population": [200.4, 143.5, 1252, 1357, 52.98]
}
brics = pd.DataFrame(dict)
brics.head()

Unnamed: 0,country,capital,area,population
0,Brazil,Brasilia,8.516,200.4
1,Russia,Moscow,17.1,143.5
2,India,New Delhi,3.286,1252.0
3,China,Beijing,9.597,1357.0
4,South Africa,Pretoria,1.221,52.98


In [3]:
brics.index = ["BR", "RU", "IN", "CH", "SA"]
brics.head()

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


#### Import

In [4]:
brics = pd.read_csv('./datasets/brics.csv', index_col=0)
brics.head()

Unnamed: 0_level_0,country,capital,area,population
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


### Index and select data
- Square brackets
- `loc` and `iloc` methods

#### Square brackets []

In [5]:
brics['country']

country_ab
BR          Brazil
RU          Russia
IN           India
CH           China
SA    South Africa
Name: country, dtype: object

In [6]:
type(brics['country'])

pandas.core.series.Series

In [7]:
brics[['country']]

Unnamed: 0_level_0,country
country_ab,Unnamed: 1_level_1
BR,Brazil
RU,Russia
IN,India
CH,China
SA,South Africa


In [8]:
brics[['country', 'capital']]

Unnamed: 0_level_0,country,capital
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1
BR,Brazil,Brasilia
RU,Russia,Moscow
IN,India,New Delhi
CH,China,Beijing
SA,South Africa,Pretoria


In [9]:
brics[1:4]

Unnamed: 0_level_0,country,capital,area,population
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0


#### loc

In [10]:
brics.loc["RU"] # Series

country       Russia
capital       Moscow
area            17.1
population     143.5
Name: RU, dtype: object

In [11]:
brics.loc[['RU']] # DataFrame

Unnamed: 0_level_0,country,capital,area,population
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RU,Russia,Moscow,17.1,143.5


In [12]:
brics.loc[['RU', 'IN', 'CH']]

Unnamed: 0_level_0,country,capital,area,population
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0


In [13]:
brics.loc[['RU', 'IN', 'CH'], ["country", "capital"]]

Unnamed: 0_level_0,country,capital
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1
RU,Russia,Moscow
IN,India,New Delhi
CH,China,Beijing


In [14]:
brics.loc[:, ["country", "capital"]]

Unnamed: 0_level_0,country,capital
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1
BR,Brazil,Brasilia
RU,Russia,Moscow
IN,India,New Delhi
CH,China,Beijing
SA,South Africa,Pretoria


`loc` is more versatile than square brackets for row & column access.

#### iloc

In [15]:
brics.iloc[1] # Series

country       Russia
capital       Moscow
area            17.1
population     143.5
Name: RU, dtype: object

In [16]:
brics.iloc[[1]] # DataFrame

Unnamed: 0_level_0,country,capital,area,population
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RU,Russia,Moscow,17.1,143.5


In [17]:
brics.iloc[[1, 2, 3], [0, 1]]

Unnamed: 0_level_0,country,capital
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1
RU,Russia,Moscow
IN,India,New Delhi
CH,China,Beijing


In [18]:
brics.iloc[:, [0, 1]]

Unnamed: 0_level_0,country,capital
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1
BR,Brazil,Brasilia
RU,Russia,Moscow
IN,India,New Delhi
CH,China,Beijing
SA,South Africa,Pretoria


## Filtering

In [22]:
is_huge = brics["area"] > 8
brics[is_huge]

Unnamed: 0_level_0,country,capital,area,population
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
CH,China,Beijing,9.597,1357.0


In [24]:
import numpy as np
brics[np.logical_and(brics["area"] > 8, brics["area"] < 10)]

Unnamed: 0_level_0,country,capital,area,population
country_ab,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BR,Brazil,Brasilia,8.516,200.4
CH,China,Beijing,9.597,1357.0


## Loops

In [26]:
for lab, row in brics.iterrows():
    print(lab + ": " + row["capital"])

BR: Brasilia
RU: Moscow
IN: New Delhi
CH: Beijing
SA: Pretoria


In [29]:
for lab, row in brics.iterrows():
    # - Creating Series on every iteration
    brics.loc[lab, "name_length"] = len(row["country"])
print(brics)

                 country    capital    area  population  name_length
country_ab                                                          
BR                Brazil   Brasilia   8.516      200.40          6.0
RU                Russia     Moscow  17.100      143.50          6.0
IN                 India  New Delhi   3.286     1252.00          5.0
CH                 China    Beijing   9.597     1357.00          5.0
SA          South Africa   Pretoria   1.221       52.98         12.0


In [31]:
# more efficient
brics["name_length"] = brics["country"].apply(len)
print(brics)

                 country    capital    area  population  name_length
country_ab                                                          
BR                Brazil   Brasilia   8.516      200.40            6
RU                Russia     Moscow  17.100      143.50            6
IN                 India  New Delhi   3.286     1252.00            5
CH                 China    Beijing   9.597     1357.00            5
SA          South Africa   Pretoria   1.221       52.98           12
