# How to Read Excel or CSV With Multiple Line Headers Using Pandas

In [1]:
!cat '../data/csv/multine_header.csv'

Date,Company A,Company A,Company B,Company B
,Rank,Points,Rank,Points
2021-09-06,1,7.9,2,6
2021-09-07,1,8.5,2,7
2021-09-08,2,8,1,8.1


## Step 2: Read CSV file with multiple headers

In [2]:
import pandas as pd

df = pd.read_csv('../data/csv/multine_header.csv', header=[0,1])
df

Unnamed: 0_level_0,Date,Company A,Company A,Company B,Company B
Unnamed: 0_level_1,Unnamed: 0_level_1,Rank,Points,Rank,Points
0,2021-09-06,1,7.9,2,6.0
1,2021-09-07,1,8.5,2,7.0
2,2021-09-08,2,8.0,1,8.1


In [3]:
df2 = pd.read_csv('../data/csv/multine_header.csv', header=[0,1, 2])
df2

Unnamed: 0_level_0,Date,Company A,Company A,Company B,Company B
Unnamed: 0_level_1,Unnamed: 0_level_1,Rank,Points,Rank,Points
Unnamed: 0_level_2,2021-09-06,1,7.9,2,6
0,2021-09-07,1,8.5,2,7.0
1,2021-09-08,2,8.0,1,8.1


## Step 3: Access data from mutli-line header DataFrame

In [4]:
df.columns

MultiIndex([(     'Date', 'Unnamed: 0_level_1'),
            ('Company A',               'Rank'),
            ('Company A',             'Points'),
            ('Company B',               'Rank'),
            ('Company B',             'Points')],
           )

In [5]:
df[('Company A', 'Rank')]

0    1
1    1
2    2
Name: (Company A, Rank), dtype: int64

In [6]:
df[[('Company A', 'Rank'), ('Company B', 'Rank')]]

Unnamed: 0_level_0,Company A,Company B
Unnamed: 0_level_1,Rank,Rank
0,1,2
1,1,2
2,2,1


## Step 4: Read Excel file with multiple headers

In [7]:
pd.read_excel('../data/excel/multine_header.xlsx', sheet_name="multine_header", header=[0,1])

Unnamed: 0_level_0,Date,Company A,Company A,Company B,Company B
Unnamed: 0_level_1,Unnamed: 0_level_1,Rank,Points,Rank,Points
0,2021-09-06,1,7.9,2,6.0
1,2021-09-07,1,8.5,2,7.0
2,2021-09-08,2,8.0,1,8.1
