## structual indexing

In [1]:
import pandas as pd
import numpy as np

In [3]:
index = [('California',2000),('California',2010),('New York', 2000), ('New York', 2010), ('Texas', 2000), ('Texas', 2010)]
populations = [39299382, 39238294, 29382938, 29382948, 29003843, 290302034]
pop = pd.Series(populations, index=index)
pop

(California, 2000)     39299382
(California, 2010)     39238294
(New York, 2000)       29382938
(New York, 2010)       29382948
(Texas, 2000)          29003843
(Texas, 2010)         290302034
dtype: int64

In [5]:
index = pd.MultiIndex.from_tuples(index)
index

MultiIndex(levels=[['California', 'New York', 'Texas'], [2000, 2010]],
           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])

In [7]:
pop = pop.reindex(index)
pop

California  2000     39299382
            2010     39238294
New York    2000     29382938
            2010     29382948
Texas       2000     29003843
            2010    290302034
dtype: int64

In [8]:
pop[:,2010]

California     39238294
New York       29382948
Texas         290302034
dtype: int64

In [9]:
pop_df = pop.unstack()
pop_df

Unnamed: 0,2000,2010
California,39299382,39238294
New York,29382938,29382948
Texas,29003843,290302034


In [10]:
pop_df.stack()

California  2000     39299382
            2010     39238294
New York    2000     29382938
            2010     29382948
Texas       2000     29003843
            2010    290302034
dtype: int64

In [11]:
pop_df = pd.DataFrame({'total': pop, 'under18': [198222, 928394, 5043049, 3209390, 39203940, 293849934]})
pop_df

Unnamed: 0,Unnamed: 1,total,under18
California,2000,39299382,198222
California,2010,39238294,928394
New York,2000,29382938,5043049
New York,2010,29382948,3209390
Texas,2000,29003843,39203940
Texas,2010,290302034,293849934


In [14]:
f_u18 = pop_df['under18']/pop_df['total']
print(f_u18)
print(f_u18.unstack())

California  2000    0.005044
            2010    0.023660
New York    2000    0.171632
            2010    0.109226
Texas       2000    1.351681
            2010    1.012221
dtype: float64
                2000      2010
California  0.005044  0.023660
New York    0.171632  0.109226
Texas       1.351681  1.012221


In [16]:
df = pd.DataFrame(np.random.rand(4,2), index = [['a','a','b','b'], [1,2,1,2]], columns = ['data1','data2'])
df

Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.954761,0.446813
a,2,0.820871,0.97213
b,1,0.743217,0.898545
b,2,0.603238,0.852631


In [18]:
pd.MultiIndex.from_arrays([['a','a','b','b'],[1,2,1,2]])

MultiIndex(levels=[['a', 'b'], [1, 2]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]])

In [19]:
pd.MultiIndex.from_tuples([('a',1),('a',2),('b',1),('b',2)])

MultiIndex(levels=[['a', 'b'], [1, 2]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]])

In [20]:
pd.MultiIndex.from_product([['a','b'],[1,2]])

MultiIndex(levels=[['a', 'b'], [1, 2]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]])

In [22]:
pop.index.names = ['state','year']
pop

state       year
California  2000     39299382
            2010     39238294
New York    2000     29382938
            2010     29382948
Texas       2000     29003843
            2010    290302034
dtype: int64

In [23]:
pop['California', 2000]

39299382

In [24]:
pop['California']

year
2000    39299382
2010    39238294
dtype: int64