In [166]:
import pandas as pd

# Loading data

In [167]:
# 1D datatype: Series
# 2D datatype: Dataframe (tabular)

In [168]:
# Load with default index
df = pd.read_csv('brics.csv')
df.head()

Unnamed: 0,code,country,population,area,capital
0,BR,Brazil,200,8515767,Brasilia
1,RU,Russia,144,17098242,Moscow
2,IN,India,1252,3287590,New Delhi
3,CH,China,1357,9596961,Beijing
4,SA,South Africa,55,1221037,Pretoria


In [169]:
# Load and set the first column as index
df = pd.read_csv('brics.csv', index_col=0)
df.head()

Unnamed: 0_level_0,country,population,area,capital
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BR,Brazil,200,8515767,Brasilia
RU,Russia,144,17098242,Moscow
IN,India,1252,3287590,New Delhi
CH,China,1357,9596961,Beijing
SA,South Africa,55,1221037,Pretoria


In [170]:
# Return (row, column)
df.shape

(5, 4)

In [171]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, BR to SA
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   country     5 non-null      object
 1   population  5 non-null      int64 
 2   area        5 non-null      int64 
 3   capital     5 non-null      object
dtypes: int64(2), object(2)
memory usage: 200.0+ bytes


In [172]:
df

Unnamed: 0_level_0,country,population,area,capital
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BR,Brazil,200,8515767,Brasilia
RU,Russia,144,17098242,Moscow
IN,India,1252,3287590,New Delhi
CH,China,1357,9596961,Beijing
SA,South Africa,55,1221037,Pretoria


# Access Values

**By Columns**

In [173]:
df['country']

code
BR          Brazil
RU          Russia
IN           India
CH           China
SA    South Africa
Name: country, dtype: object

In [174]:
df.country

code
BR          Brazil
RU          Russia
IN           India
CH           China
SA    South Africa
Name: country, dtype: object

In [175]:
df[ ['country', 'population'] ]

Unnamed: 0_level_0,country,population
code,Unnamed: 1_level_1,Unnamed: 2_level_1
BR,Brazil,200
RU,Russia,144
IN,India,1252
CH,China,1357
SA,South Africa,55


In [176]:
df[ ['country'] ]

Unnamed: 0_level_0,country
code,Unnamed: 1_level_1
BR,Brazil
RU,Russia
IN,India
CH,China
SA,South Africa


**Access by rows and/or columns**

In [177]:
df

Unnamed: 0_level_0,country,population,area,capital
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BR,Brazil,200,8515767,Brasilia
RU,Russia,144,17098242,Moscow
IN,India,1252,3287590,New Delhi
CH,China,1357,9596961,Beijing
SA,South Africa,55,1221037,Pretoria


In [178]:
# Method 1: using loc --> Template is .loc[rows, columns]
df.loc['RU']

country         Russia
population         144
area          17098242
capital         Moscow
Name: RU, dtype: object

In [179]:
df.loc['RU', 'area']

17098242

In [180]:
# Also method 1 but with slicing
df.loc['RU':'CH', 'country':'area']

Unnamed: 0_level_0,country,population,area
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RU,Russia,144,17098242
IN,India,1252,3287590
CH,China,1357,9596961


In [181]:
# Method 2: using iloc --> Template is .iloc[row indices, column indices]
df.iloc[1]

country         Russia
population         144
area          17098242
capital         Moscow
Name: RU, dtype: object

In [182]:
df.iloc[1,2]

17098242

In [183]:
# Method 2 but with slicing
df.iloc[1:4,0:3]

Unnamed: 0_level_0,country,population,area
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RU,Russia,144,17098242
IN,India,1252,3287590
CH,China,1357,9596961


In [184]:
df.iloc[:,3]

code
BR     Brasilia
RU       Moscow
IN    New Delhi
CH      Beijing
SA     Pretoria
Name: capital, dtype: object

**Accessing Rows using boolean indexing (mostly for filtering)**

In [185]:
df

Unnamed: 0_level_0,country,population,area,capital
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BR,Brazil,200,8515767,Brasilia
RU,Russia,144,17098242,Moscow
IN,India,1252,3287590,New Delhi
CH,China,1357,9596961,Beijing
SA,South Africa,55,1221037,Pretoria


In [186]:
df[ [False, True, True, True, False] ]

Unnamed: 0_level_0,country,population,area,capital
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RU,Russia,144,17098242,Moscow
IN,India,1252,3287590,New Delhi
CH,China,1357,9596961,Beijing


In [187]:
df['population'] > 1000

code
BR    False
RU    False
IN     True
CH     True
SA    False
Name: population, dtype: bool

In [188]:
df[ df['population'] > 1000 ]

Unnamed: 0_level_0,country,population,area,capital
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
IN,India,1252,3287590,New Delhi
CH,China,1357,9596961,Beijing


# Append Values

In [189]:
df

Unnamed: 0_level_0,country,population,area,capital
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BR,Brazil,200,8515767,Brasilia
RU,Russia,144,17098242,Moscow
IN,India,1252,3287590,New Delhi
CH,China,1357,9596961,Beijing
SA,South Africa,55,1221037,Pretoria


**Add new column**

In [190]:
# Method 1: align with a list or series
new_column = [True, False, False, False, True]
df['on_america'] = new_column
df

Unnamed: 0_level_0,country,population,area,capital,on_america
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BR,Brazil,200,8515767,Brasilia,True
RU,Russia,144,17098242,Moscow,False
IN,India,1252,3287590,New Delhi,False
CH,China,1357,9596961,Beijing,False
SA,South Africa,55,1221037,Pretoria,True


In [191]:
df['new column'] = 100
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BR,Brazil,200,8515767,Brasilia,True,100
RU,Russia,144,17098242,Moscow,False,100
IN,India,1252,3287590,New Delhi,False,100
CH,China,1357,9596961,Beijing,False,100
SA,South Africa,55,1221037,Pretoria,True,100


In [192]:
df['pop_density'] = df['population'] / df['area'] * 1000000
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,200,8515767,Brasilia,True,100,23.485847
RU,Russia,144,17098242,Moscow,False,100,8.421918
IN,India,1252,3287590,New Delhi,False,100,380.826076
CH,China,1357,9596961,Beijing,False,100,141.398928
SA,South Africa,55,1221037,Pretoria,True,100,45.04368


**Add new row**

In [193]:
# Method 1
df.loc['SG'] = ['Singapore', 6, 671, 'Singapore', False, 100, 6/671*1000000]
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,200,8515767,Brasilia,True,100,23.485847
RU,Russia,144,17098242,Moscow,False,100,8.421918
IN,India,1252,3287590,New Delhi,False,100,380.826076
CH,China,1357,9596961,Beijing,False,100,141.398928
SA,South Africa,55,1221037,Pretoria,True,100,45.04368
SG,Singapore,6,671,Singapore,False,100,8941.877794


In [194]:
# Method 2: using .append
df2 = df.iloc[:3]
df2

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,200,8515767,Brasilia,True,100,23.485847
RU,Russia,144,17098242,Moscow,False,100,8.421918
IN,India,1252,3287590,New Delhi,False,100,380.826076


In [195]:
df.append(df2)


Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,200,8515767,Brasilia,True,100,23.485847
RU,Russia,144,17098242,Moscow,False,100,8.421918
IN,India,1252,3287590,New Delhi,False,100,380.826076
CH,China,1357,9596961,Beijing,False,100,141.398928
SA,South Africa,55,1221037,Pretoria,True,100,45.04368
SG,Singapore,6,671,Singapore,False,100,8941.877794
BR,Brazil,200,8515767,Brasilia,True,100,23.485847
RU,Russia,144,17098242,Moscow,False,100,8.421918
IN,India,1252,3287590,New Delhi,False,100,380.826076


In [196]:
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,200,8515767,Brasilia,True,100,23.485847
RU,Russia,144,17098242,Moscow,False,100,8.421918
IN,India,1252,3287590,New Delhi,False,100,380.826076
CH,China,1357,9596961,Beijing,False,100,141.398928
SA,South Africa,55,1221037,Pretoria,True,100,45.04368
SG,Singapore,6,671,Singapore,False,100,8941.877794


# Change values

In [197]:
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,200,8515767,Brasilia,True,100,23.485847
RU,Russia,144,17098242,Moscow,False,100,8.421918
IN,India,1252,3287590,New Delhi,False,100,380.826076
CH,China,1357,9596961,Beijing,False,100,141.398928
SA,South Africa,55,1221037,Pretoria,True,100,45.04368
SG,Singapore,6,671,Singapore,False,100,8941.877794


In [198]:
df.loc['BR','population'] = 211
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,211,8515767,Brasilia,True,100,23.485847
RU,Russia,144,17098242,Moscow,False,100,8.421918
IN,India,1252,3287590,New Delhi,False,100,380.826076
CH,China,1357,9596961,Beijing,False,100,141.398928
SA,South Africa,55,1221037,Pretoria,True,100,45.04368
SG,Singapore,6,671,Singapore,False,100,8941.877794


In [199]:
df['new column'] = 200
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,211,8515767,Brasilia,True,200,23.485847
RU,Russia,144,17098242,Moscow,False,200,8.421918
IN,India,1252,3287590,New Delhi,False,200,380.826076
CH,China,1357,9596961,Beijing,False,200,141.398928
SA,South Africa,55,1221037,Pretoria,True,200,45.04368
SG,Singapore,6,671,Singapore,False,200,8941.877794


# Delete

In [200]:
# Put cursor inside a function, then press shift+tab --> to read documentation

In [201]:
# delete row
df.drop('SA', axis=0)

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,211,8515767,Brasilia,True,200,23.485847
RU,Russia,144,17098242,Moscow,False,200,8.421918
IN,India,1252,3287590,New Delhi,False,200,380.826076
CH,China,1357,9596961,Beijing,False,200,141.398928
SG,Singapore,6,671,Singapore,False,200,8941.877794


In [203]:
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,211,8515767,Brasilia,True,200,23.485847
RU,Russia,144,17098242,Moscow,False,200,8.421918
IN,India,1252,3287590,New Delhi,False,200,380.826076
CH,China,1357,9596961,Beijing,False,200,141.398928
SA,South Africa,55,1221037,Pretoria,True,200,45.04368
SG,Singapore,6,671,Singapore,False,200,8941.877794


In [239]:
# delete column. Can use inplace to change the dataframe, or simply overwrite the variable
new_df = df.drop('population', axis=1)

In [233]:
df

Unnamed: 0_level_0,country,population,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BR,Brazil,211,8515767,Brasilia,True,200,23.485847
RU,Russia,144,17098242,Moscow,False,200,8.421918
IN,India,1252,3287590,New Delhi,False,200,380.826076
CH,China,1357,9596961,Beijing,False,200,141.398928
SA,South Africa,55,1221037,Pretoria,True,200,45.04368
SG,Singapore,6,671,Singapore,False,200,8941.877794


In [209]:
# Save file
new_df.to_csv('new_df.csv', index=False)

# Rename Columns

In [212]:
new_df

Unnamed: 0_level_0,country,area,capital,on_america,new column,pop_density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BR,Brazil,8515767,Brasilia,True,200,23.485847
RU,Russia,17098242,Moscow,False,200,8.421918
IN,India,3287590,New Delhi,False,200,380.826076
CH,China,9596961,Beijing,False,200,141.398928
SA,South Africa,1221037,Pretoria,True,200,45.04368
SG,Singapore,671,Singapore,False,200,8941.877794


In [237]:
# method 1
new_df = new_df.rename( {'country':"COUNTRY"}, axis=1 )

In [238]:
new_df

Unnamed: 0_level_0,COUNTRY,area,capital,on_america,new column,POPULATION DENSITY
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BR,Brazil,8515767,Brasilia,True,200,23.485847
RU,Russia,17098242,Moscow,False,200,8.421918
IN,India,3287590,New Delhi,False,200,380.826076
CH,China,9596961,Beijing,False,200,141.398928
SA,South Africa,1221037,Pretoria,True,200,45.04368
SG,Singapore,671,Singapore,False,200,8941.877794


In [235]:
# method 2
new_df.columns = list(new_df.columns[:5]) + ['POPULATION DENSITY']

In [236]:
new_df

Unnamed: 0_level_0,country,area,capital,on_america,new column,POPULATION DENSITY
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BR,Brazil,8515767,Brasilia,True,200,23.485847
RU,Russia,17098242,Moscow,False,200,8.421918
IN,India,3287590,New Delhi,False,200,380.826076
CH,China,9596961,Beijing,False,200,141.398928
SA,South Africa,1221037,Pretoria,True,200,45.04368
SG,Singapore,671,Singapore,False,200,8941.877794
