In [1]:
import pandas as pd

In [13]:
bigmac = pd.read_csv('../datasets/bigmac.csv', parse_dates=['Date'])
bigmac.head()

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35
3,2016-01-01,Britain,4.22
4,2016-01-01,Canada,4.14


In [6]:
bigmac.dtypes

Date                   datetime64[ns]
Country                        object
Price in US Dollars           float64
dtype: object

In [7]:
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 652 non-null    datetime64[ns]
 1   Country              652 non-null    object        
 2   Price in US Dollars  652 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


### Create Multi Index with set_index method

In [14]:
# Best practice is to put the col with least num of unique
# values as outer layer

# Indexed by Date and then by Country
bigmac.set_index(keys=['Date', 'Country'], inplace=True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35


In [17]:
# Sort will first sort the outer layer, then layers below
bigmac.sort_index(inplace=True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [21]:
# This will return a tuple for multi-index dfs
bigmac.index

MultiIndex([('2010-01-01',      'Argentina'),
            ('2010-01-01',      'Australia'),
            ('2010-01-01',         'Brazil'),
            ('2010-01-01',        'Britain'),
            ('2010-01-01',         'Canada'),
            ('2010-01-01',          'Chile'),
            ('2010-01-01',          'China'),
            ('2010-01-01',       'Colombia'),
            ('2010-01-01',     'Costa Rica'),
            ('2010-01-01', 'Czech Republic'),
            ...
            ('2016-01-01',    'Switzerland'),
            ('2016-01-01',         'Taiwan'),
            ('2016-01-01',       'Thailand'),
            ('2016-01-01',         'Turkey'),
            ('2016-01-01',            'UAE'),
            ('2016-01-01',        'Ukraine'),
            ('2016-01-01',  'United States'),
            ('2016-01-01',        'Uruguay'),
            ('2016-01-01',      'Venezuela'),
            ('2016-01-01',        'Vietnam')],
           names=['Date', 'Country'], length=652)

In [23]:
# This will return index cols names
bigmac.index.names

FrozenList(['Date', 'Country'])

### Extrac index level values with get_level_values method

In [26]:
# Create multi index during import
bigmac = pd.read_csv('../datasets/bigmac.csv', 
                     parse_dates=['Date'], 
                     index_col=['Date', 'Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [29]:
# Call this method on INDEX!!!
# these calls are equivalent
bigmac.index.get_level_values('Date')
bigmac.index.get_level_values[0]

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)

In [31]:
bigmac.index.get_level_values('Country')
bigmac.index.get_level_values[1]

Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Colombia', 'Costa Rica', 'Czech Republic',
       ...
       'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine',
       'United States', 'Uruguay', 'Venezuela', 'Vietnam'],
      dtype='object', name='Country', length=652)

### Change index level name with set_names method

In [34]:
# set_names is invoked on index!!!
bigmac.index.set_names(names=['Day', 'Locations'], inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Day,Locations,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [40]:
bigmac.index.set_names(names='Date', 
                       level=0, 
                       inplace=True)
# Same result
bigmac.index.set_names(names='Date', 
                       level='Day', 
                       inplace=True)
bigmac.head()

KeyError: 'Level Day not found'

### The sort_index method on a multi index df

In [42]:
bigmac = pd.read_csv('../datasets/bigmac.csv', 
                     parse_dates=['Date'], 
                     index_col=['Date', 'Country'])
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14


In [46]:
# Sort 1st index in ascending, 2nd in descending
# returns a brand new df
bigmac.sort_index(ascending=[True, False], inplace=True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Uruguay,3.32
2010-01-01,United States,3.58
2010-01-01,Ukraine,1.83


In [48]:
# Sort only a particular index, use level
bigmac.sort_index(level=0)
bigmac.sort_index(level='Date')

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
...,...,...
2016-01-01,Ukraine,1.54
2016-01-01,United States,4.93
2016-01-01,Uruguay,3.74
2016-01-01,Venezuela,0.66


In [49]:
bigmac.sort_index(level=1)
bigmac.sort_index(level='Country')

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-07-01,Argentina,3.56
2011-07-01,Argentina,4.84
2012-01-01,Argentina,4.64
2012-07-01,Argentina,4.16
...,...,...
2014-01-01,Vietnam,2.84
2014-07-01,Vietnam,2.83
2015-01-01,Vietnam,2.81
2015-07-01,Vietnam,2.75


### Extrac rows from a multi index df using .loc (for labels) and .iloc (for indices)

In [70]:
bigmac = pd.read_csv('../datasets/bigmac.csv', 
                     parse_dates=['Date'], 
                     index_col=['Date', 'Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [57]:
# Have 1st arg encapsulate row info, and 2nd col info
bigmac.loc[('2010-01-01', 'Argentina'), 'Price in US Dollars']

1.84

In [62]:
# Multi index does not affect .iloc
bigmac.iloc[0]

bigmac.iloc[[10, 20, 30]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Denmark,5.99
2010-01-01,Malaysia,2.08
2010-01-01,Singapore,3.19


### The .transpose method on a multi index df

In [79]:
# Swap rows and columns
# Does not mutate df
# If you want to make it permanent - assign to a new df
bigmac = bigmac.transpose()
bigmac.head()

Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,...,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01
Country,Argentina,Australia,Brazil,Britain,Canada,Chile,China,Colombia,Costa Rica,Czech Republic,...,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine,United States,Uruguay,Venezuela,Vietnam
Price in US Dollars,1.84,3.98,4.76,3.67,3.97,3.18,1.83,3.91,3.52,3.71,...,6.44,2.08,3.09,3.41,3.54,1.54,4.93,3.74,0.66,2.67


In [80]:
# Extract prices for a specified date from Sri Lanka to Ukraine
bigmac.loc[('Price in US Dollars',), ('2010-01-01', 'Sri Lanka'):('2010-01-01','Ukraine')]

Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01
Country,Sri Lanka,Sweden,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine
Price in US Dollars,1.83,5.51,6.3,2.36,2.11,3.83,2.99,1.83


### The .swaplevel() method

In [81]:
bigmac = pd.read_csv('../datasets/bigmac.csv', 
                     parse_dates=['Date'], 
                     index_col=['Date', 'Country'])
bigmac.sort_index(inplace=True)
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [84]:
# Swap index positions
bigmac.swaplevel('Date', 'Country')
bigmac.swaplevel(0, 1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Country,Date,Unnamed: 2_level_1
Argentina,2010-01-01,1.84
Australia,2010-01-01,3.98
Brazil,2010-01-01,4.76
Britain,2010-01-01,3.67
Canada,2010-01-01,3.97
...,...,...
Ukraine,2016-01-01,1.54
United States,2016-01-01,4.93
Uruguay,2016-01-01,3.74
Venezuela,2016-01-01,0.66


### The .stack() method

In [86]:
# Takes the column labels and moves them to the left
# Increases num of rows and reduces num of cols
world = pd.read_csv('../datasets/worldstats.csv', index_col=['country', 'year'])
world.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0
Arab World,2012,368802611.0,2773270000000.0
Arab World,2011,361031820.0,2497945000000.0


In [87]:
world.stack()

country     year            
Arab World  2015  Population    3.920223e+08
                  GDP           2.530102e+12
            2014  Population    3.842226e+08
                  GDP           2.873600e+12
            2013  Population    3.765043e+08
                                    ...     
Zimbabwe    1962  GDP           1.117602e+09
            1961  Population    3.876638e+06
                  GDP           1.096647e+09
            1960  Population    3.752390e+06
                  GDP           1.052990e+09
Length: 22422, dtype: float64

In [88]:
world.stack().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,Population,3.920223e+08
Arab World,2015,GDP,2.530102e+12
Arab World,2014,Population,3.842226e+08
Arab World,2014,GDP,2.873600e+12
Arab World,2013,Population,3.765043e+08
...,...,...,...
Zimbabwe,1962,GDP,1.117602e+09
Zimbabwe,1961,Population,3.876638e+06
Zimbabwe,1961,GDP,1.096647e+09
Zimbabwe,1960,Population,3.752390e+06


### The .unstack() method

In [89]:
# Takes the most inner index layer and makes it a column
world = pd.read_csv('../datasets/worldstats.csv', index_col=['country', 'year'])
world.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0
Arab World,2012,368802611.0,2773270000000.0
Arab World,2011,361031820.0,2497945000000.0


In [91]:
s = world.stack()
s.head()

country     year            
Arab World  2015  Population    3.920223e+08
                  GDP           2.530102e+12
            2014  Population    3.842226e+08
                  GDP           2.873600e+12
            2013  Population    3.765043e+08
dtype: float64

In [96]:

s.unstack().unstack()

Unnamed: 0_level_0,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,...,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP
year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Afghanistan,8.994793e+06,9.164945e+06,9.343772e+06,9.531555e+06,9.728645e+06,9.935358e+06,1.014884e+07,1.036860e+07,1.059979e+07,1.084951e+07,...,7.057598e+09,9.843842e+09,1.019053e+10,1.248694e+10,1.593680e+10,1.793024e+10,2.053654e+10,2.004633e+10,2.005019e+10,1.919944e+10
Albania,,,,,,,,,,,...,8.992642e+09,1.070101e+10,1.288135e+10,1.204421e+10,1.192695e+10,1.289087e+10,1.231978e+10,1.278103e+10,1.327796e+10,1.145560e+10
Algeria,1.112489e+07,1.140486e+07,1.169015e+07,1.198513e+07,1.229597e+07,1.262695e+07,1.298027e+07,1.335420e+07,1.374438e+07,1.414444e+07,...,1.170273e+11,1.349771e+11,1.710007e+11,1.372110e+11,1.612073e+11,2.000131e+11,2.090474e+11,2.097035e+11,2.135185e+11,1.668386e+11
Andorra,,,,,,,,,,,...,3.536452e+09,4.010785e+09,4.001349e+09,3.649863e+09,3.346317e+09,3.427236e+09,3.146178e+09,3.249101e+09,,
Angola,,,,,,,,,,,...,4.178948e+10,6.044892e+10,8.417803e+10,7.549238e+10,8.247091e+10,1.041159e+11,1.153984e+11,1.249121e+11,1.267751e+11,1.026431e+11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
West Bank and Gaza,,,,,,,,,,,...,4.910100e+09,5.505800e+09,6.673500e+09,7.268200e+09,8.913100e+09,1.045985e+10,1.127940e+10,1.247600e+10,1.271560e+10,1.267740e+10
World,3.035056e+09,3.076121e+09,3.129064e+09,3.193947e+09,3.259355e+09,3.326054e+09,3.395866e+09,3.465297e+09,3.535512e+09,3.609910e+09,...,5.107451e+13,5.758343e+13,6.312856e+13,5.983553e+13,6.564782e+13,7.284314e+13,7.442836e+13,7.643132e+13,7.810634e+13,7.343364e+13
"Yemen, Rep.",,,,,,,,,,,...,1.908173e+10,2.563367e+10,3.039720e+10,2.845950e+10,3.090675e+10,3.107886e+10,3.207477e+10,3.595450e+10,,
Zambia,3.049586e+06,3.142848e+06,3.240664e+06,3.342894e+06,3.449266e+06,3.559687e+06,3.674088e+06,3.792864e+06,3.916928e+06,4.047479e+06,...,1.275686e+10,1.405696e+10,1.791086e+10,1.532834e+10,2.026555e+10,2.345952e+10,2.550306e+10,2.804552e+10,2.713464e+10,2.120156e+10


In [99]:
s.unstack().unstack().unstack()

            year  country           
Population  1960  Afghanistan           8.994793e+06
                  Albania                        NaN
                  Algeria               1.112489e+07
                  Andorra                        NaN
                  Angola                         NaN
                                            ...     
GDP         2015  West Bank and Gaza    1.267740e+10
                  World                 7.343364e+13
                  Yemen, Rep.                    NaN
                  Zambia                2.120156e+10
                  Zimbabwe              1.389294e+10
Length: 28224, dtype: float64

In [104]:
s.unstack(-1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,1960,8994793.0,5.377778e+08
Afghanistan,1961,9164945.0,5.488889e+08
Afghanistan,1962,9343772.0,5.466667e+08
Afghanistan,1963,9531555.0,7.511112e+08
Afghanistan,1964,9728645.0,8.000000e+08
...,...,...,...
Zimbabwe,2011,14255592.0,1.095623e+10
Zimbabwe,2012,14565482.0,1.239272e+10
Zimbabwe,2013,14898092.0,1.349023e+10
Zimbabwe,2014,15245855.0,1.419691e+10


In [109]:
# Make year (1st) and country (2nd) cols
s.unstack(level=[1, 0])
# s.unstack(level=['Year', 'Country'])

year,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,...,1969,1968,1967,1966,1965,1964,1963,1962,1961,1960
country,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,...,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe
Population,392022300.0,384222600.0,376504300.0,368802600.0,361031800.0,353112200.0,345054200.0,336886500.0,328766600.0,320906700.0,...,5036321.0,4874113.0,4718612.0,4568320.0,4422132.0,4279561.0,4140804.0,4006262.0,3876638.0,3752390.0
GDP,2530102000000.0,2873600000000.0,2846994000000.0,2773270000000.0,2497945000000.0,2103825000000.0,1798878000000.0,2081343000000.0,1641666000000.0,1404190000000.0,...,1747999000.0,1479600000.0,1397002000.0,1281750000.0,1311436000.0,1217138000.0,1159512000.0,1117602000.0,1096647000.0,1052990000.0


In [111]:
# Fill value replaces NaN
s.unstack('year', fill_value=0)

Unnamed: 0_level_0,year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,Population,8.994793e+06,9.164945e+06,9.343772e+06,9.531555e+06,9.728645e+06,9.935358e+06,1.014884e+07,1.036860e+07,1.059979e+07,1.084951e+07,...,2.518362e+07,2.587754e+07,2.652874e+07,2.720729e+07,2.796221e+07,2.880917e+07,2.972680e+07,3.068250e+07,3.162751e+07,3.252656e+07
Afghanistan,GDP,5.377778e+08,5.488889e+08,5.466667e+08,7.511112e+08,8.000000e+08,1.006667e+09,1.400000e+09,1.673333e+09,1.373333e+09,1.408889e+09,...,7.057598e+09,9.843842e+09,1.019053e+10,1.248694e+10,1.593680e+10,1.793024e+10,2.053654e+10,2.004633e+10,2.005019e+10,1.919944e+10
Albania,Population,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,2.992547e+06,2.970017e+06,2.947314e+06,2.927519e+06,2.913021e+06,2.904780e+06,2.900247e+06,2.896652e+06,2.893654e+06,2.889167e+06
Albania,GDP,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,8.992642e+09,1.070101e+10,1.288135e+10,1.204421e+10,1.192695e+10,1.289087e+10,1.231978e+10,1.278103e+10,1.327796e+10,1.145560e+10
Algeria,Population,1.112489e+07,1.140486e+07,1.169015e+07,1.198513e+07,1.229597e+07,1.262695e+07,1.298027e+07,1.335420e+07,1.374438e+07,1.414444e+07,...,3.374933e+07,3.426197e+07,3.481106e+07,3.540179e+07,3.603616e+07,3.671713e+07,3.743943e+07,3.818614e+07,3.893433e+07,3.966652e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Yemen, Rep.",GDP,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,1.908173e+10,2.563367e+10,3.039720e+10,2.845950e+10,3.090675e+10,3.107886e+10,3.207477e+10,3.595450e+10,0.000000e+00,0.000000e+00
Zambia,Population,3.049586e+06,3.142848e+06,3.240664e+06,3.342894e+06,3.449266e+06,3.559687e+06,3.674088e+06,3.792864e+06,3.916928e+06,4.047479e+06,...,1.238151e+07,1.273868e+07,1.311458e+07,1.350785e+07,1.391744e+07,1.434353e+07,1.478658e+07,1.524609e+07,1.572134e+07,1.621177e+07
Zambia,GDP,6.987397e+08,6.823597e+08,6.792797e+08,7.043397e+08,8.226397e+08,1.061200e+09,1.239000e+09,1.340639e+09,1.573739e+09,1.926399e+09,...,1.275686e+10,1.405696e+10,1.791086e+10,1.532834e+10,2.026555e+10,2.345952e+10,2.550306e+10,2.804552e+10,2.713464e+10,2.120156e+10
Zimbabwe,Population,3.752390e+06,3.876638e+06,4.006262e+06,4.140804e+06,4.279561e+06,4.422132e+06,4.568320e+06,4.718612e+06,4.874113e+06,5.036321e+06,...,1.312794e+07,1.329780e+07,1.349546e+07,1.372100e+07,1.397390e+07,1.425559e+07,1.456548e+07,1.489809e+07,1.524586e+07,1.560275e+07


### The .pivot() method

In [115]:
sales = pd.read_csv('../datasets/salesmen.csv', parse_dates=['Date'])
sales['Salesman'] = sales['Salesman'].astype('category')
sales.head()

Unnamed: 0,Date,Salesman,Revenue
0,2016-01-01,Bob,7172
1,2016-01-02,Bob,6362
2,2016-01-03,Bob,5982
3,2016-01-04,Bob,7917
4,2016-01-05,Bob,7837


In [116]:
# Condense df, 5 rows become 1
# Salesman values become column header
sales.pivot(index='Date', columns='Salesman', values='Revenue')

Salesman,Bob,Dave,Jeb,Oscar,Ronald
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01,7172,1864,4430,5250,2639
2016-01-02,6362,8278,8026,8661,4951
2016-01-03,5982,4226,5188,7075,2703
2016-01-04,7917,3868,3144,2524,4258
2016-01-05,7837,2287,938,2793,7771
...,...,...,...,...,...
2016-12-27,2045,2843,6666,835,2981
2016-12-28,100,8888,1243,3073,6129
2016-12-29,4115,9490,3498,6424,7662
2016-12-30,2577,3594,8858,7088,2570


### The .pivot_table() method

In [118]:
foods = pd.read_csv('../datasets/foods.csv')
foods.head()

Unnamed: 0,First Name,Gender,City,Frequency,Item,Spend
0,Wanda,Female,Stamford,Weekly,Burger,15.66
1,Eric,Male,Stamford,Daily,Chalupa,10.56
2,Charles,Male,New York,Never,Sushi,42.14
3,Anna,Female,Philadelphia,Once,Ice Cream,11.01
4,Deborah,Female,Philadelphia,Daily,Chalupa,23.49


In [123]:
# Get sum spent based on gender and item per city
foods.pivot_table(values='Spend', index=['Gender', 'Item'], columns=['Frequency', 'City'], aggfunc='sum')

Unnamed: 0_level_0,Frequency,Daily,Daily,Daily,Monthly,Monthly,Monthly,Never,Never,Never,Often,...,Once,Seldom,Seldom,Seldom,Weekly,Weekly,Weekly,Yearly,Yearly,Yearly
Unnamed: 0_level_1,City,New York,Philadelphia,Stamford,New York,Philadelphia,Stamford,New York,Philadelphia,Stamford,New York,...,Stamford,New York,Philadelphia,Stamford,New York,Philadelphia,Stamford,New York,Philadelphia,Stamford
Gender,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Female,Burger,262.67,231.68,144.66,171.86,215.05,238.49,97.89,218.97,90.97,94.96,...,95.05,63.16,409.05,97.53,184.35,32.0,155.02,259.3,123.17,307.03
Female,Burrito,224.45,321.57,195.63,122.74,34.28,67.94,189.73,382.3,366.34,103.6,...,168.01,167.54,198.11,234.49,39.69,62.82,230.91,71.26,116.75,172.98
Female,Chalupa,43.19,23.49,95.7,158.37,289.96,161.98,35.15,121.97,156.36,39.73,...,40.59,160.0,274.51,175.25,171.52,84.41,204.69,157.82,504.44,348.16
Female,Donut,478.1,247.4,124.35,284.53,50.25,229.3,56.07,433.58,157.33,130.63,...,158.24,90.81,183.25,104.66,285.56,208.8,440.06,62.95,175.23,224.48
Female,Ice Cream,262.19,177.69,92.88,92.53,74.51,125.85,206.15,156.03,77.66,232.26,...,279.33,242.35,203.1,117.73,227.62,285.28,125.03,151.67,159.86,15.24
Female,Sushi,81.07,348.53,393.27,93.16,78.71,108.39,69.33,190.58,19.56,343.94,...,194.75,87.7,314.24,27.82,410.89,334.0,315.36,232.41,145.85,362.74
Male,Burger,319.46,112.7,197.72,187.29,213.14,27.16,90.32,17.31,,55.47,...,326.64,225.68,188.06,159.75,69.69,99.89,310.21,49.61,197.36,225.07
Male,Burrito,236.21,165.76,276.23,147.54,89.58,119.6,86.78,378.32,351.84,189.92,...,30.15,202.4,27.71,9.84,256.74,289.25,161.85,220.7,118.51,262.64
Male,Chalupa,54.09,274.81,192.64,333.76,90.7,343.76,199.09,242.98,138.7,251.52,...,,11.69,130.75,174.02,163.2,135.68,44.37,167.74,68.81,116.19
Male,Donut,230.0,190.71,129.42,183.73,259.29,119.53,219.63,53.65,219.64,139.8,...,139.89,16.25,99.01,163.31,74.44,115.8,311.27,143.1,44.61,16.52


### The pd.melt() method to create narrow dataset (opposite of pivot)

In [126]:
sales = pd.read_csv('../datasets/quarters.csv')
sales.head()

Unnamed: 0,Salesman,Q1,Q2,Q3,Q4
0,Boris,602908,233879,354479,32704
1,Bob,43790,514863,297151,544493
2,Tommy,392668,113579,430882,247231
3,Travis,834663,266785,749238,570524
4,Donald,580935,411379,110390,651572


In [130]:
# Move column headers into rows
# Condense several columns into a single col/row
pd.melt(sales, id_vars='Salesman', var_name='Quarter', value_name='Revenue')

Unnamed: 0,Salesman,Quarter,Revenue
0,Boris,Q1,602908
1,Bob,Q1,43790
2,Tommy,Q1,392668
3,Travis,Q1,834663
4,Donald,Q1,580935
5,Ted,Q1,656644
6,Jeb,Q1,486141
7,Stacy,Q1,479662
8,Morgan,Q1,992673
9,Boris,Q2,233879
