# Pandas

In [1]:
import numpy as np
import pandas as pd

## Dataframe

In [2]:
cars_per_cap = [800, 100, 902, 430, 19, 45]
countries = ['United States', 'Japan', 'Australia', 'India', 'Russia', 'Iran']
drives_right = [True, False, True, False, False, True]

In [3]:
data = { 'cars_per_cap': cars_per_cap, 'countries': countries, 'drives_right': drives_right }

In [4]:
cars = pd.DataFrame(data)
cars

Unnamed: 0,cars_per_cap,countries,drives_right
0,800,United States,True
1,100,Japan,False
2,902,Australia,True
3,430,India,False
4,19,Russia,False
5,45,Iran,True


## Reading from file

In [5]:
cars = pd.read_csv('./cars.csv')

In [6]:
cars.head()

Unnamed: 0,USCA,US,United States,809,FALSE
0,ASPAC,AUS,Australia,731.0,True
1,ASPAC,JAP,Japan,588.0,True
2,ASPAC,IN,India,18.0,True
3,ASPAC,RU,Russia,200.0,False
4,LATAM,MOR,Morocco,70.0,False


In [7]:
# by default the first row is assumed as header

cars = pd.read_csv('./cars.csv', header=None) # will make header as 0, 1, 2 ...
cars.head()

Unnamed: 0,0,1,2,3,4
0,USCA,US,United States,809.0,False
1,ASPAC,AUS,Australia,731.0,True
2,ASPAC,JAP,Japan,588.0,True
3,ASPAC,IN,India,18.0,True
4,ASPAC,RU,Russia,200.0,False


### Assign Headers

In [8]:
cars.columns = ['country_code', 'reigon', 'country', 'cars_per_cap', 'drives_right']

cars

Unnamed: 0,country_code,reigon,country,cars_per_cap,drives_right
0,USCA,US,United States,809.0,False
1,ASPAC,AUS,Australia,731.0,True
2,ASPAC,JAP,Japan,588.0,True
3,ASPAC,IN,India,18.0,True
4,ASPAC,RU,Russia,200.0,False
5,LATAM,MOR,Morocco,70.0,False
6,AFR,EG,Egypt,45.0,False
7,EUR,ENG,England,,True


In [9]:
df = pd.read_csv('https://media-doselect.s3.amazonaws.com/generic/A08MajL8qN4rq72EpVJbAP1Rw/marks_1.csv', sep = '|', header = None) # separator
df

Unnamed: 0,0,1,2,3,4,5
0,1,Akshay,Mathematics,50,40,80
1,2,Mahima,English,40,33,83
2,3,Vikas,Mathematics,50,42,84
3,4,Abhinav,English,40,31,78
4,5,Mahima,Science,50,40,80
5,6,Akshay,Science,50,49,98
6,7,Abhinav,Mathematics,50,47,94
7,8,Vikas,Science,50,40,80
8,9,Abhinav,Science,50,47,94
9,10,Vikas,English,40,39,98


## Indexing a dataframe

In [10]:
cars.index

RangeIndex(start=0, stop=8, step=1)

In [11]:
cars = pd.read_csv('./cars.csv', header=None, index_col = 0) # set the first column as index
cars

Unnamed: 0_level_0,1,2,3,4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
USCA,US,United States,809.0,False
ASPAC,AUS,Australia,731.0,True
ASPAC,JAP,Japan,588.0,True
ASPAC,IN,India,18.0,True
ASPAC,RU,Russia,200.0,False
LATAM,MOR,Morocco,70.0,False
AFR,EG,Egypt,45.0,False
EUR,ENG,England,,True


In [12]:
cars.index

Index(['USCA', 'ASPAC', 'ASPAC', 'ASPAC', 'ASPAC', 'LATAM', 'AFR', 'EUR'], dtype='object', name=0)

In [15]:
cars.columns = ['reigon', 'country', 'cars_per_cap', 'drives_right']
cars

Unnamed: 0_level_0,reigon,country,cars_per_cap,drives_right
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
USCA,US,United States,809.0,False
ASPAC,AUS,Australia,731.0,True
ASPAC,JAP,Japan,588.0,True
ASPAC,IN,India,18.0,True
ASPAC,RU,Russia,200.0,False
LATAM,MOR,Morocco,70.0,False
AFR,EG,Egypt,45.0,False
EUR,ENG,England,,True


In [16]:
cars.index.name = 'country_code'

In [17]:
cars

Unnamed: 0_level_0,reigon,country,cars_per_cap,drives_right
country_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
USCA,US,United States,809.0,False
ASPAC,AUS,Australia,731.0,True
ASPAC,JAP,Japan,588.0,True
ASPAC,IN,India,18.0,True
ASPAC,RU,Russia,200.0,False
LATAM,MOR,Morocco,70.0,False
AFR,EG,Egypt,45.0,False
EUR,ENG,England,,True


In [18]:
cars.index.name = None
cars

Unnamed: 0,reigon,country,cars_per_cap,drives_right
USCA,US,United States,809.0,False
ASPAC,AUS,Australia,731.0,True
ASPAC,JAP,Japan,588.0,True
ASPAC,IN,India,18.0,True
ASPAC,RU,Russia,200.0,False
LATAM,MOR,Morocco,70.0,False
AFR,EG,Egypt,45.0,False
EUR,ENG,England,,True


## Heirarchical Index

In [21]:
cars = pd.read_csv('./cars.csv', header=None)
cars.columns = ['country_code', 'reigon', 'country', 'cars_per_cap', 'drives_right']
cars.set_index(['reigon', 'country_code'], inplace=True)

cars

Unnamed: 0_level_0,Unnamed: 1_level_0,country,cars_per_cap,drives_right
reigon,country_code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
US,USCA,United States,809.0,False
AUS,ASPAC,Australia,731.0,True
JAP,ASPAC,Japan,588.0,True
IN,ASPAC,India,18.0,True
RU,ASPAC,Russia,200.0,False
MOR,LATAM,Morocco,70.0,False
EG,AFR,Egypt,45.0,False
ENG,EUR,England,,True


## write to csv

In [22]:
cars.to_csv('cars_df.csv')

## ex

In [27]:
df = pd.read_csv('https://media-doselect.s3.amazonaws.com/generic/A08MajL8qN4rq72EpVJbAP1Rw/marks_1.csv', sep='|', header=None, index_col=0)
df.columns = ['Name', 'Subject', 'Maximum Marks', 'Marks Obtained', 'Percentage']
df.index.name = 'S.No.'
df

Unnamed: 0_level_0,Name,Subject,Maximum Marks,Marks Obtained,Percentage
S.No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Akshay,Mathematics,50,40,80
2,Mahima,English,40,33,83
3,Vikas,Mathematics,50,42,84
4,Abhinav,English,40,31,78
5,Mahima,Science,50,40,80
6,Akshay,Science,50,49,98
7,Abhinav,Mathematics,50,47,94
8,Vikas,Science,50,40,80
9,Abhinav,Science,50,47,94
10,Vikas,English,40,39,98
