In [20]:
import pandas as pd
import numpy as np

In [2]:
population_dict = {'California': 38332521,
                             'Texas': 26448193,
                             'New York': 19651127,
                             'Florida': 19552860,
                             'Illinois': 12882135}

In [3]:
population = pd.Series(population_dict)

In [4]:
population

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [6]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
               'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)

In [7]:
area

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
dtype: int64

In [8]:
states = pd.DataFrame({'population':population ,
                       'area':area})

In [9]:
states

Unnamed: 0,area,population
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [10]:
states.index

Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')

In [11]:
states.columns

Index(['area', 'population'], dtype='object')

In [12]:
states['area']

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
Name: area, dtype: int64

In [13]:
states['population']

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
Name: population, dtype: int64

In [18]:
'California' in states.index

True

In [19]:
states.T

Unnamed: 0,California,Florida,Illinois,New York,Texas
area,423967,170312,149995,141297,695662
population,38332521,19552860,12882135,19651127,26448193


In [21]:
# hierarchical indices and columns
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],
                                   names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],
                                     names=['subject', 'type'])
# mock some data
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37

In [22]:
health_data = pd.DataFrame(data, index=index, columns = columns)

In [24]:
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,52.0,36.7,54.0,36.1,23.0,37.9
2013,2,35.0,36.4,18.0,38.8,53.0,36.8
2014,1,39.0,37.0,53.0,37.9,48.0,36.2
2014,2,34.0,35.3,53.0,35.7,37.0,35.8


In [25]:
health_data['Guido']

Unnamed: 0_level_0,type,HR,Temp
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
2013,1,54.0,36.1
2013,2,18.0,38.8
2014,1,53.0,37.9
2014,2,53.0,35.7


In [26]:
health_data['Guido','HR']

year  visit
2013  1        54.0
      2        18.0
2014  1        53.0
      2        53.0
Name: (Guido, HR), dtype: float64

In [27]:
health_data.index

MultiIndex(levels=[[2013, 2014], [1, 2]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
           names=['year', 'visit'])

In [28]:
health_data.unstack()

subject,Bob,Bob,Bob,Bob,Guido,Guido,Guido,Guido,Sue,Sue,Sue,Sue
type,HR,HR,Temp,Temp,HR,HR,Temp,Temp,HR,HR,Temp,Temp
visit,1,2,1,2,1,2,1,2,1,2,1,2
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
2013,52.0,35.0,36.7,36.4,54.0,18.0,36.1,38.8,23.0,53.0,37.9,36.8
2014,39.0,34.0,37.0,35.3,53.0,53.0,37.9,35.7,48.0,37.0,36.2,35.8


In [29]:
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,52.0,36.7,54.0,36.1,23.0,37.9
2013,2,35.0,36.4,18.0,38.8,53.0,36.8
2014,1,39.0,37.0,53.0,37.9,48.0,36.2
2014,2,34.0,35.3,53.0,35.7,37.0,35.8


In [30]:
data_mean = health_data.mean(level='year')

In [31]:
data_mean

subject,Bob,Bob,Guido,Guido,Sue,Sue
type,HR,Temp,HR,Temp,HR,Temp
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2013,43.5,36.55,36.0,37.45,38.0,37.35
2014,36.5,36.15,53.0,36.8,42.5,36.0
