In [18]:
import pandas as pd

index = [('california', 2000), ('california', 2010),
         ('ny', 2000), ('ny', 2010), 
         ('texas', 2000), ('texas', 2010)]
population = [10000, 110000,
              120000, 123000,
              7800, 79800]


#conventional way, bad representation
pop = pd.Series(population, index = index)
print(pop)

print()
#pandas multiindexing
index = pd.MultiIndex.from_tuples(index)
print(index)

print()
pop = pop.reindex(index)
print(pop)

print()
#indexing
print(pop[:, 2010])


print()
#dataFrame conversion
pop_df = pop.unstack()
print(pop_df)

b = pop.index.names = ['states', 'year']
print(b)

(california, 2000)     10000
(california, 2010)    110000
(ny, 2000)            120000
(ny, 2010)            123000
(texas, 2000)           7800
(texas, 2010)          79800
dtype: int64

MultiIndex([('california', 2000),
            ('california', 2010),
            (        'ny', 2000),
            (        'ny', 2010),
            (     'texas', 2000),
            (     'texas', 2010)],
           )

california  2000     10000
            2010    110000
ny          2000    120000
            2010    123000
texas       2000      7800
            2010     79800
dtype: int64

california    110000
ny            123000
texas          79800
dtype: int64

              2000    2010
california   10000  110000
ny          120000  123000
texas         7800   79800
['states', 'year']


## Methods of Multiindex creation

In [2]:
import pandas as pd
import numpy as np

a = np.random.rand(4,2)
print(a)

print()
multi = pd.DataFrame(a, index=[['a', 'a', 'b', 'b'], [1,2,1,2]], columns=['data1', 'data2'])
multi

[[0.63877379 0.46269736]
 [0.51030634 0.55738894]
 [0.12133286 0.94731238]
 [0.5042636  0.47925159]]



Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.638774,0.462697
a,2,0.510306,0.557389
b,1,0.121333,0.947312
b,2,0.504264,0.479252


#### if you pass tuple with appropiate key then it will multiindex by default

In [1]:
import pandas as pd

data = {
    ('cali', 2000) : 3345,
    ('cali', 2010) : 33456,
    ('ny', 2000) : 1234,
    ('ny', 2010) : 13454
}


data

{('cali', 2000): 3345,
 ('cali', 2010): 33456,
 ('ny', 2000): 1234,
 ('ny', 2010): 13454}

## Multiindex for columns

In [6]:
import pandas as pd
import numpy as np

index = pd.MultiIndex.from_product([['california', 'texas'], [1,2]], names=['location', 'visit'])
columns = pd.MultiIndex.from_product([['bob', 'guido', 'sue'], ['hr', 'temp']], names=['subject', 'type'])

data = np.random.rand(4,6)


file_data = pd.DataFrame(data, index=index, columns=columns)
print(file_data)

print()
file_data['guido']

subject                bob               guido                 sue          
type                    hr      temp        hr      temp        hr      temp
location   visit                                                            
california 1      0.956238  0.977854  0.203085  0.282376  0.739276  0.648384
           2      0.935595  0.436538  0.848459  0.778857  0.803747  0.910928
texas      1      0.371565  0.320059  0.432114  0.928734  0.266501  0.542895
           2      0.087087  0.278067  0.379200  0.152400  0.094022  0.913540



Unnamed: 0_level_0,type,hr,temp
location,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
california,1,0.203085,0.282376
california,2,0.848459,0.778857
texas,1,0.432114,0.928734
texas,2,0.3792,0.1524


## Merge two datas

In [10]:
a = pd.DataFrame({'employee': ['abc', 'abb', 'auc', 'abr'], 'group': ['hr', 'eng', 'sde', 'se']})
a
b = pd.DataFrame({'employee': ['abc', 'abb', 'auc', 'abr'], 'type': ['perm', 'temp', 'perm', 'temp']})
b
pd.merge(a,b)

Unnamed: 0,employee,group,type
0,abc,hr,perm
1,abb,eng,temp
2,auc,sde,perm
3,abr,se,temp
