In [30]:
import numpy as np
import pandas as pd
import random 

In [4]:
area_dict = {'California': 423967, 'Texas': 695662, 'Florida': 170312, 'New York': 141297, 'Pennsylvania': 119280}
population_dict = {'California' : 39538223, 'Texas' : 29145505 , 'Florida' : 21538187, 'New York' : 20201249, 'Pennsylvania' : 13002700}
density_dict = {'California' : 90.00, 'Texas' : 41.896 , 'Florida' : 126.5, 'New York' : 142.97, 'Pennsylvania' : 109.30}
area = pd.Series(area_dict)


In [5]:
#Run a dictionary list
area_dict = {area : amount for area,amount in area_dict.items()}
#Create a series so can convert to a dataframe
series_area = pd.Series(area_dict)
series_pop = pd.Series(population_dict)
series_density = pd.Series(density_dict)
#Check that it worked
print("Area")
print(series_area)
print("Population")
print(series_pop)

Area
California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
dtype: int64
Population
California      39538223
Texas           29145505
Florida         21538187
New York        20201249
Pennsylvania    13002700
dtype: int64


In [6]:
df_area = series_area.to_frame(name="area").reset_index().rename(columns={"index": "state"})
df_pop = series_pop.to_frame(name="area").reset_index().rename(columns={"index": "state"})
df_density = series_density.to_frame(name="area").reset_index().rename(columns={"index": "state"})
print(df_area)
print(df_density)

          state    area
0    California  423967
1         Texas  695662
2       Florida  170312
3      New York  141297
4  Pennsylvania  119280
          state     area
0    California   90.000
1         Texas   41.896
2       Florida  126.500
3      New York  142.970
4  Pennsylvania  109.300


In [7]:
df_density.rename(columns={"area": "density"}, inplace=True)
df_pop.rename(columns={"area": "population"}, inplace=True)

In [8]:
data_1 = pd.merge(df_area, df_pop, on="state")


In [16]:
data = pd.merge(data_1, df_density, on="state")

In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   state       5 non-null      object 
 1   area        5 non-null      int64  
 2   population  5 non-null      int64  
 3   density     5 non-null      float64
dtypes: float64(1), int64(2), object(1)
memory usage: 292.0+ bytes


In [20]:
data.loc[0:2]

Unnamed: 0,state,area,population,density
0,California,423967,39538223,90.0
1,Texas,695662,29145505,41.896
2,Florida,170312,21538187,126.5


In [22]:
data.iloc[0:2]

Unnamed: 0,state,area,population,density
0,California,423967,39538223,90.0
1,Texas,695662,29145505,41.896


In [24]:
#The : before 'Florida' means select all rows from the beginning up to and including the row with the index 'Florida'.
data.loc[:'Florida']

Unnamed: 0,state,area,population,density
0,California,423967,39538223,90.0
1,Texas,695662,29145505,41.896
2,Florida,170312,21538187,126.5
3,New York,141297,20201249,142.97
4,Pennsylvania,119280,13002700,109.3


In [28]:
index = pd.MultiIndex.from_product([[2013,2014], [1,2]], names=['year','visit'])
columns = pd.MultiIndex.from_product([['Bob','Lob','Sob'],['HR','Temp']], names=['subject','type'])


In [34]:
data = np.round(np.random.randn(4,6),1)
data[:,::2] *=10
data +=37

This is fundamentally four-dimensional data, where the dimensions are the subject,
the measurement type, the year, and the visit number.

In [36]:
#Create dataframe
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Lob,Lob,Sob,Sob
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,19.0,37.2,31.0,37.5,39.0,37.3
2013,2,36.0,37.7,21.0,38.2,44.0,38.2
2014,1,39.0,37.1,29.0,38.5,43.0,37.8
2014,2,30.0,37.0,32.0,37.6,22.0,36.8


 With this in place we can, for
example, index the top-level column by the person’s name and get a full DataFrame
containing just that person’s information

In [40]:
health_data['Lob']

Unnamed: 0_level_0,type,HR,Temp
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
2013,1,31.0,37.5
2013,2,21.0,38.2
2014,1,29.0,38.5
2014,2,32.0,37.6


In [42]:
health_data['Lob', 'HR']

year  visit
2013  1        31.0
      2        21.0
2014  1        29.0
      2        32.0
Name: (Lob, HR), dtype: float64