In [1]:
import pandas as pd

# Intro/ Looking at the Data

In [2]:
# convert Date column to datetime type
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"])
bigmac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


In [3]:
bigmac.dtypes

Date                   datetime64[ns]
Country                        object
Price in US Dollars           float64
dtype: object

In [4]:
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
Date                   652 non-null datetime64[ns]
Country                652 non-null object
Price in US Dollars    652 non-null float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


# Create a MultiIndex with the .set_index() Method

In [5]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"])
bigmac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


In [6]:
# pass a list to .set_index() keys parameter to create a multiindex dataframe
bigmac.set_index(keys=["Date", "Country"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14
2016-01-01,Chile,2.94
2016-01-01,China,2.68
2016-01-01,Colombia,2.43
2016-01-01,Costa Rica,4.02
2016-01-01,Czech Republic,2.98


In [7]:
# order of columns in list changes order of indexes in dataframe
bigmac.set_index(keys=["Country", "Date"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Country,Date,Unnamed: 2_level_1
Argentina,2016-01-01,2.39
Australia,2016-01-01,3.74
Brazil,2016-01-01,3.35
Britain,2016-01-01,4.22
Canada,2016-01-01,4.14
Chile,2016-01-01,2.94
China,2016-01-01,2.68
Colombia,2016-01-01,2.43
Costa Rica,2016-01-01,4.02
Czech Republic,2016-01-01,2.98


In [8]:
bigmac.set_index(keys=["Date", "Country"], inplace=True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35


In [9]:
# calling .sort_index() on multiindex dataframe will sort each index piece by piece
bigmac.sort_index(inplace=True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [10]:
# calling .index on multiindex dataframe will return a MultiIndex object. contains array of each index column
bigmac.index

MultiIndex(levels=[[2010-01-01 00:00:00, 2010-07-01 00:00:00, 2011-07-01 00:00:00, 2012-01-01 00:00:00, 2012-07-01 00:00:00, 2013-01-01 00:00:00, 2013-07-01 00:00:00, 2014-01-01 00:00:00, 2014-07-01 00:00:00, 2015-01-01 00:00:00, 2015-07-01 00:00:00, 2016-01-01 00:00:00], ['Argentina', 'Australia', 'Austria', 'Belgium', 'Brazil', 'Britain', 'Canada', 'Chile', 'China', 'Colombia', 'Costa Rica', 'Czech Republic', 'Denmark', 'Egypt', 'Estonia', 'Euro area', 'Finland', 'France', 'Germany', 'Greece', 'Hong Kong', 'Hungary', 'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Japan', 'Latvia', 'Lithuania', 'Malaysia', 'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Pakistan', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Russia', 'Saudi Arabia', 'Singapore', 'South Africa', 'South Korea', 'Spain', 'Sri Lanka', 'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine', 'United States', 'Uruguay', 'Venezuela', 'Vietnam']],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [11]:
# .index.names will return index labels
bigmac.index.names

FrozenList(['Date', 'Country'])

In [12]:
# can return a tuple of the index values at each index of the dataframe
bigmac.index[0]

(Timestamp('2010-01-01 00:00:00'), 'Argentina')

# The .get_level_values() Method

In [13]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"], index_col=["Date", "Country"])
bigmac.sort_index(inplace=True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [14]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Year", "Name"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Dirk Nowitzki,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
1999-01-01,Vince Carter,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
2000-01-01,Dirk Nowitzki,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


In [15]:
# use .get_level_values() to get the index values of the specified level. takes a numerical index position or index name
players.index.get_level_values(0)

DatetimeIndex(['1999-01-01', '1999-01-01', '2000-01-01', '2000-01-01',
               '2001-01-01', '2001-01-01', '2001-01-01', '2002-01-01',
               '2002-01-01', '2002-01-01',
               ...
               '2019-01-01', '2019-01-01', '2019-01-01', '2019-01-01',
               '2019-01-01', '2019-01-01', '2019-01-01', '2019-01-01',
               '2019-01-01', '2019-01-01'],
              dtype='datetime64[ns]', name='Year', length=2343, freq=None)

In [16]:
# use index label to get values
players.index.get_level_values("Name")

Index(['Dirk Nowitzki', 'Vince Carter', 'Dirk Nowitzki', 'Vince Carter',
       'Dirk Nowitzki', 'Jamal Crawford', 'Vince Carter', 'Dirk Nowitzki',
       'Jamal Crawford', 'Pau Gasol',
       ...
       'Will Barton', 'Willie Cauley-Stein', 'Wilson Chandler', 'Yogi Ferrell',
       'Yuta Watanabe', 'Zach Collins', 'Zach LaVine', 'Zach Lofton',
       'Zach Randolph', 'Zaza Pachulia'],
      dtype='object', name='Name', length=2343)

# The .set_names() Method

In [17]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Year", "Name"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Dirk Nowitzki,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
1999-01-01,Vince Carter,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
2000-01-01,Dirk Nowitzki,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


In [18]:
# .set_names() lets you change the index label at specified level
players.index.set_names("Player", level=1, inplace=True)

In [19]:
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Player,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Dirk Nowitzki,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
1999-01-01,Vince Carter,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
2000-01-01,Dirk Nowitzki,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


In [20]:
# can also pass a list to .set_names() to changes all index labels at once
players.index.set_names(["Season", "Player Name"], inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Season,Player Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Dirk Nowitzki,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
1999-01-01,Vince Carter,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
2000-01-01,Dirk Nowitzki,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


# The .sort_index() Method on a MultiIndex DataFrame

In [21]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Year", "Name"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Dirk Nowitzki,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
1999-01-01,Vince Carter,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
2000-01-01,Dirk Nowitzki,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


In [22]:
# passing False to ascending parameter sets ALL INDEXES in data frame to ascending=False
players.sort_index(ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2019-01-01,Zaza Pachulia,34,DET,C,32,0,13.1,1.3,3.2,0.412,0.0,...,2.3,0.770,1.8,2.7,1.5,0.7,0.3,0.8,2.2,4.4
2019-01-01,Zach Randolph,36,SAC,PF,59,57,25.6,6.1,12.9,0.473,0.9,...,1.8,0.785,1.6,5.1,2.2,0.7,0.2,2.0,2.0,14.5
2019-01-01,Zach Lofton,26,DET,SG,1,0,4.0,0.0,1.0,0.000,0.0,...,0.0,,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
2019-01-01,Zach LaVine,23,CHI,SG,35,34,34.5,8.3,18.2,0.457,1.8,...,5.9,0.868,0.4,3.9,4.3,0.9,0.5,3.6,2.2,23.5
2019-01-01,Zach Collins,21,POR,C,42,0,18.9,2.7,5.5,0.491,0.5,...,1.6,0.797,1.3,3.0,0.9,0.4,0.8,1.1,2.5,7.3
2019-01-01,Yuta Watanabe,24,MEM,SG,4,0,4.8,0.5,1.3,0.400,0.0,...,0.5,1.000,0.0,1.3,0.0,0.0,0.3,0.3,0.0,1.5
2019-01-01,Yogi Ferrell,25,SAC,SG,31,2,13.6,2.1,4.8,0.436,0.7,...,1.0,0.875,0.1,1.3,1.7,0.4,0.1,0.4,0.7,5.8
2019-01-01,Wilson Chandler,31,PHI,PF,27,23,25.6,2.4,5.6,0.437,1.1,...,0.4,0.667,1.3,3.4,2.0,0.4,0.4,1.0,2.7,6.3
2019-01-01,Willie Cauley-Stein,25,SAC,C,41,41,28.6,5.9,11.2,0.526,0.0,...,3.4,0.525,2.3,6.5,2.6,1.2,0.6,1.2,3.0,13.6
2019-01-01,Will Barton,28,DEN,SG,2,2,27.0,6.0,10.5,0.571,2.5,...,2.0,1.000,0.0,5.0,3.0,0.5,1.0,1.5,2.5,16.5


In [23]:
# can specify which order you want each index to be sorted by passing a list of booleans to ascending
players.sort_index(ascending=[False, True])

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2019-01-01,Aaron Gordon,23,ORL,PF,39,39,33.4,5.8,13.1,0.442,1.5,...,3.1,0.681,1.6,5.9,3.3,0.8,0.7,1.7,2.3,15.2
2019-01-01,Aaron Holiday,22,IND,PG,23,0,11.7,2.0,5.0,0.412,0.6,...,1.1,0.885,0.1,1.6,1.5,0.5,0.3,0.7,1.3,5.7
2019-01-01,Abdel Nader,25,OKC,SF,22,1,7.5,1.2,2.6,0.466,0.4,...,0.2,1.000,0.1,1.2,0.2,0.2,0.0,0.4,1.0,3.0
2019-01-01,Al Horford,32,BOS,C,31,31,28.2,5.0,9.8,0.508,1.2,...,1.1,0.706,1.5,4.7,3.9,0.8,1.4,1.5,1.8,12.0
2019-01-01,Al-Farouq Aminu,28,POR,PF,42,42,30.1,3.2,7.5,0.425,1.4,...,2.0,0.837,1.5,6.6,1.0,0.9,0.4,0.8,2.1,9.4
2019-01-01,Alec Burks,27,TOT,SG,39,13,23.3,3.5,8.7,0.402,1.2,...,2.6,0.822,0.4,3.0,2.0,0.6,0.4,1.1,1.5,10.4
2019-01-01,Alex Abrines,25,OKC,SG,29,2,19.8,1.9,5.3,0.359,1.4,...,0.4,0.923,0.2,1.4,0.7,0.6,0.2,0.5,1.8,5.6
2019-01-01,Alex Caruso,24,LAL,SG,2,0,2.5,0.0,0.0,,0.0,...,0.0,,0.5,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2019-01-01,Alex Len,25,ATL,C,37,20,20.2,3.9,8.2,0.474,0.5,...,2.8,0.657,2.1,3.4,0.9,0.3,1.0,1.5,2.8,10.1
2019-01-01,Alex Poythress,25,ATL,PF,18,1,13.5,1.9,3.6,0.538,0.4,...,1.2,0.524,1.3,1.9,0.7,0.2,0.4,0.6,2.2,4.9


# Extract Rows from a MultiIndex DataFrame

In [24]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Year", "Name"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Dirk Nowitzki,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
1999-01-01,Vince Carter,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
2000-01-01,Dirk Nowitzki,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


In [25]:
# can use .loc[] to get rows from dataframe. to get multiple rows you MUST PASS A TUPLE into .loc[]. ONLY WORKS FOR TOP LEVEL index
players.loc[("2010-01-01")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2010-01-01,Al Horford,23,ATL,C,81,81,35.1,5.8,10.5,0.551,0.0,...,3.3,0.789,2.9,7.0,2.3,0.7,1.1,1.5,2.8,14.2
2010-01-01,Amir Johnson,22,TOR,PF,82,5,17.7,2.5,4.1,0.623,0.0,...,1.8,0.638,1.9,2.9,0.6,0.5,0.8,0.8,3.1,6.2
2010-01-01,Andre Iguodala,26,PHI,SF,82,82,38.9,6.0,13.7,0.443,1.1,...,5.2,0.733,1.0,5.5,5.8,1.7,0.7,2.7,1.8,17.1
2010-01-01,Anthony Tolliver,24,TOT,PF,46,29,31.0,4.2,9.8,0.430,1.1,...,2.9,0.769,1.8,5.1,1.9,0.7,0.7,1.0,2.8,11.7
2010-01-01,Brook Lopez,21,NJN,C,82,82,36.9,6.9,13.8,0.499,0.0,...,6.2,0.817,3.3,5.4,2.3,0.7,1.7,2.5,3.1,18.8
2010-01-01,C.J. Miles,22,UTA,SF,63,28,23.8,3.8,8.9,0.429,1.2,...,1.5,0.695,0.7,2.0,1.7,0.9,0.3,1.3,3.0,9.9
2010-01-01,Carmelo Anthony,25,DEN,SF,69,69,38.2,10.0,21.8,0.458,0.9,...,8.9,0.830,2.2,4.4,3.2,1.3,0.4,3.0,3.3,28.2
2010-01-01,Channing Frye,25,POR,PF,63,1,11.8,1.8,4.2,0.423,0.2,...,0.6,0.722,0.7,1.6,0.4,0.3,0.3,0.5,1.7,4.2
2010-01-01,Chris Paul,24,NOH,PG,45,45,38.0,7.0,14.2,0.493,1.2,...,4.2,0.847,0.4,3.8,10.7,2.1,0.2,2.5,2.6,18.7
2010-01-01,Courtney Lee,24,NJN,SG,71,66,33.5,4.8,10.9,0.436,1.1,...,2.2,0.869,0.8,2.7,1.7,1.3,0.3,1.1,1.8,12.5


In [26]:
# to get individual row must enter all indexes into .loc[]
players.loc["2000-01-01", "Vince Carter"]

Age                                        23
Team                                      TOR
Position                                   SF
Games                                      82
Games Started                              82
Minutes Per Game                         38.1
Field Goals Per Game                      9.6
Field Goal Attempts Per Game             20.7
Field Goal Percent                      0.465
3-Point Field Goals Per Game              1.2
3-Point Field Goal Attempts Per Game      2.9
3-Point Field Goal Percent              0.403
2-Point Field Goals Per Game              8.5
2-Point Field Goal Attempts Per Game     17.8
2-Point Field Goal Percent              0.475
Effective Field Goal Percent            0.493
Free Throws Per Game                      5.3
Free Throw Attempts Per Game              6.7
Free Throw Percent                      0.791
Offensive Rebounds Per Game               1.8
Defensive Rebounds Per Game                 4
Assists Per Game                  

In [27]:
# to get single column from single row indexes must be passed in as a tuple
players.loc[("2000-01-01", "Vince Carter"), "Age"]

Year        Name        
2000-01-01  Vince Carter    23
Name: Age, dtype: int64

# The .transpose() Method

In [28]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Year", "Name"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Dirk Nowitzki,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
1999-01-01,Vince Carter,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
2000-01-01,Dirk Nowitzki,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


In [29]:
# .transpose() swaps the axes of the dataframe
players.transpose()

Year,1999-01-01,1999-01-01,2000-01-01,2000-01-01,2001-01-01,2001-01-01,2001-01-01,2002-01-01,2002-01-01,2002-01-01,...,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01
Name,Dirk Nowitzki,Vince Carter,Dirk Nowitzki,Vince Carter,Dirk Nowitzki,Jamal Crawford,Vince Carter,Dirk Nowitzki,Jamal Crawford,Pau Gasol,...,Will Barton,Willie Cauley-Stein,Wilson Chandler,Yogi Ferrell,Yuta Watanabe,Zach Collins,Zach LaVine,Zach Lofton,Zach Randolph,Zaza Pachulia
Age,20,22,21,23,22,20,24,23,21,21,...,28,25,31,25,24,21,23,26,36,34
Team,DAL,TOR,DAL,TOR,DAL,CHI,TOR,DAL,CHI,MEM,...,DEN,SAC,PHI,SAC,MEM,POR,CHI,DET,SAC,DET
Position,PF,SF,PF,SF,PF,PG,SF,C,PG,C,...,SG,C,PF,SG,SG,C,SG,SG,PF,C
Games,47,50,82,82,82,61,75,76,23,82,...,2,41,27,31,4,42,35,1,59,32
Games Started,24,49,81,82,82,8,75,76,6,79,...,2,41,23,2,0,0,34,0,57,0
Minutes Per Game,20.4,35.2,35.8,38.1,38.1,17.2,39.7,38,20.9,36.7,...,27,28.6,25.6,13.6,4.8,18.9,34.5,4,25.6,13.1
Field Goals Per Game,2.9,6.9,6.3,9.6,7.2,1.8,10.2,7.9,3.9,6.7,...,6,5.9,2.4,2.1,0.5,2.7,8.3,0,6.1,1.3
Field Goal Attempts Per Game,7.1,15.3,13.6,20.7,15.2,5,22.1,16.6,8.1,13,...,10.5,11.2,5.6,4.8,1.3,5.5,18.2,1,12.9,3.2
Field Goal Percent,0.405,0.45,0.461,0.465,0.474,0.352,0.46,0.477,0.476,0.518,...,0.571,0.526,0.437,0.436,0.4,0.491,0.457,0,0.473,0.412
3-Point Field Goals Per Game,0.3,0.4,1.4,1.2,1.8,0.7,2.2,1.8,1.1,0,...,2.5,0,1.1,0.7,0,0.5,1.8,0,0.9,0


In [30]:
# no inplace parameter so you must reassign dataframe if you want transpose to be permanent
players = players.transpose()

In [31]:
players.head(3)

Year,1999-01-01,1999-01-01,2000-01-01,2000-01-01,2001-01-01,2001-01-01,2001-01-01,2002-01-01,2002-01-01,2002-01-01,...,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01,2019-01-01
Name,Dirk Nowitzki,Vince Carter,Dirk Nowitzki,Vince Carter,Dirk Nowitzki,Jamal Crawford,Vince Carter,Dirk Nowitzki,Jamal Crawford,Pau Gasol,...,Will Barton,Willie Cauley-Stein,Wilson Chandler,Yogi Ferrell,Yuta Watanabe,Zach Collins,Zach LaVine,Zach Lofton,Zach Randolph,Zaza Pachulia
Age,20,22,21,23,22,20,24,23,21,21,...,28,25,31,25,24,21,23,26,36,34
Team,DAL,TOR,DAL,TOR,DAL,CHI,TOR,DAL,CHI,MEM,...,DEN,SAC,PHI,SAC,MEM,POR,CHI,DET,SAC,DET
Position,PF,SF,PF,SF,PF,PG,SF,C,PG,C,...,SG,C,PF,SG,SG,C,SG,SG,PF,C


In [32]:
# can use .loc[] to extract rows from transposed dataframe
players.loc["Age"]

Year        Name               
1999-01-01  Dirk Nowitzki          20
            Vince Carter           22
2000-01-01  Dirk Nowitzki          21
            Vince Carter           23
2001-01-01  Dirk Nowitzki          22
            Jamal Crawford         20
            Vince Carter           24
2002-01-01  Dirk Nowitzki          23
            Jamal Crawford         21
            Pau Gasol              21
            Tyson Chandler         19
            Vince Carter           25
2003-01-01  Dirk Nowitzki          24
            Jamal Crawford         22
            Nene                   20
            Pau Gasol              22
            Tyson Chandler         20
            Vince Carter           26
            Zach Randolph          20
2004-01-01  Carmelo Anthony        19
            Dirk Nowitzki          25
            Dwyane Wade            22
            Jamal Crawford         23
            Kyle Korver            22
            LeBron James           19
            Nene  

In [33]:
# can extract rows with specific index(columns of transposed dataframe)
players.loc["Age", "2000-01-01"]

Year        Name         
2000-01-01  Dirk Nowitzki    21
            Vince Carter     23
Name: Age, dtype: object

In [34]:
# to extract single value indexes must be entered as a tuple
players.loc["Age", ("2000-01-01", "Vince Carter")]

Year        Name        
2000-01-01  Vince Carter    23
Name: Age, dtype: object

# The .swaplevel() Method

In [35]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Year", "Name"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Year,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Dirk Nowitzki,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
1999-01-01,Vince Carter,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
2000-01-01,Dirk Nowitzki,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


In [36]:
# when dataframe only has 2 index levels you can call .swaplevel() with no arguments to switch them
players.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Dirk Nowitzki,1999-01-01,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
Vince Carter,1999-01-01,22,TOR,SF,50,49,35.2,6.9,15.3,0.450,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
Dirk Nowitzki,2000-01-01,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.830,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5
Vince Carter,2000-01-01,23,TOR,SF,82,82,38.1,9.6,20.7,0.465,1.2,...,6.7,0.791,1.8,4.0,3.9,1.3,1.1,2.2,3.2,25.7
Dirk Nowitzki,2001-01-01,22,DAL,PF,82,82,38.1,7.2,15.2,0.474,1.8,...,6.6,0.838,1.5,7.7,2.1,1.0,1.2,1.9,3.0,21.8
Jamal Crawford,2001-01-01,20,CHI,PG,61,8,17.2,1.8,5.0,0.352,0.7,...,0.6,0.794,0.1,1.3,2.3,0.7,0.2,1.4,1.1,4.6
Vince Carter,2001-01-01,24,TOR,SF,75,75,39.7,10.2,22.1,0.460,2.2,...,6.7,0.765,2.3,3.2,3.9,1.5,1.1,2.2,2.7,27.6
Dirk Nowitzki,2002-01-01,23,DAL,C,76,76,38.0,7.9,16.6,0.477,1.8,...,6.8,0.853,1.6,8.4,2.4,1.1,1.0,1.9,2.9,23.4
Jamal Crawford,2002-01-01,21,CHI,PG,23,6,20.9,3.9,8.1,0.476,1.1,...,0.6,0.769,0.2,1.3,2.4,0.8,0.2,1.4,0.8,9.3
Pau Gasol,2002-01-01,21,MEM,C,82,79,36.7,6.7,13.0,0.518,0.0,...,5.8,0.709,2.9,6.0,2.7,0.5,2.1,2.7,2.4,17.6


In [37]:
# no inplace parameter so you have to reassign dataframe if you want index swap to be permanent
players = players.swaplevel()

In [38]:
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Dirk Nowitzki,1999-01-01,20,DAL,PF,47,24,20.4,2.9,7.1,0.405,0.3,...,2.7,0.773,0.9,2.6,1.0,0.6,0.6,1.6,2.2,8.2
Vince Carter,1999-01-01,22,TOR,SF,50,49,35.2,6.9,15.3,0.45,0.4,...,5.4,0.761,1.9,3.8,3.0,1.1,1.5,2.2,2.8,18.3
Dirk Nowitzki,2000-01-01,21,DAL,PF,82,81,35.8,6.3,13.6,0.461,1.4,...,4.2,0.83,1.2,5.2,2.5,0.8,0.8,1.7,3.1,17.5


# The .stack() Method

In [39]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Team", "Year"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Age,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Team,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ATL,2006-01-01,Zaza Pachulia,21,C,78,78,31.4,3.9,8.7,0.451,0.0,...,5.2,0.735,3.4,4.5,1.7,1.1,0.5,2.3,3.7,11.7
ATL,2007-01-01,Zaza Pachulia,22,C,72,47,28.1,4.0,8.5,0.474,0.0,...,5.2,0.786,2.8,4.2,1.5,1.1,0.5,2.3,3.7,12.2
ATL,2008-01-01,Al Horford,21,C,81,77,31.4,4.1,8.2,0.499,0.0,...,2.6,0.731,3.1,6.6,1.5,0.7,0.9,1.7,3.3,10.1


In [40]:
# turns each column value into a row value with the column label as it's index. returns a Series. 
# players goes from 2343 rows and 27 columns to a Series with 63,012 rows
# new players Series has 3 indexes for each value
s = players.stack()

In [41]:
type(s)

pandas.core.series.Series

In [42]:
s

Team  Year                                            
ATL   2006-01-01  Name                                     Zaza Pachulia
                  Age                                                 21
                  Position                                             C
                  Games                                               78
                  Games Started                                       78
                  Minutes Per Game                                  31.4
                  Field Goals Per Game                               3.9
                  Field Goal Attempts Per Game                       8.7
                  Field Goal Percent                               0.451
                  3-Point Field Goals Per Game                         0
                  3-Point Field Goal Attempts Per Game                 0
                  3-Point Field Goal Percent                           0
                  2-Point Field Goals Per Game                       

In [43]:
# can use .to_frame() to convert Series into a dataframe
s.to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
Team,Year,Unnamed: 2_level_1,Unnamed: 3_level_1
ATL,2006-01-01,Name,Zaza Pachulia
ATL,2006-01-01,Age,21
ATL,2006-01-01,Position,C
ATL,2006-01-01,Games,78
ATL,2006-01-01,Games Started,78
ATL,2006-01-01,Minutes Per Game,31.4
ATL,2006-01-01,Field Goals Per Game,3.9
ATL,2006-01-01,Field Goal Attempts Per Game,8.7
ATL,2006-01-01,Field Goal Percent,0.451
ATL,2006-01-01,3-Point Field Goals Per Game,0


# The .unstack() Method, Part 1

In [44]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Name", "Year"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Aaron Gordon,2015-01-01,19,ORL,PF,47,8,17.0,2.0,4.4,0.447,0.3,...,1.3,0.721,1.0,2.6,0.7,0.4,0.5,0.8,1.8,5.2
Aaron Gordon,2016-01-01,20,ORL,PF,78,37,23.9,3.5,7.4,0.473,0.5,...,2.5,0.668,2.0,4.5,1.6,0.8,0.7,0.8,2.0,9.2
Aaron Gordon,2017-01-01,21,ORL,SF,80,72,28.7,4.9,10.8,0.454,1.0,...,2.7,0.719,1.5,3.6,1.9,0.8,0.5,1.1,2.2,12.7


In [45]:
players_stack = players.stack()

In [46]:
players_stack.head(3)

Name          Year                
Aaron Gordon  2015-01-01  Age          19
                          Team        ORL
                          Position     PF
dtype: object

In [47]:
# call .unstack() to get back to unstacked dataframe
players_stack.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Aaron Gordon,2015-01-01,19,ORL,PF,47,8,17,2,4.4,0.447,0.3,...,1.3,0.721,1,2.6,0.7,0.4,0.5,0.8,1.8,5.2
Aaron Gordon,2016-01-01,20,ORL,PF,78,37,23.9,3.5,7.4,0.473,0.5,...,2.5,0.668,2,4.5,1.6,0.8,0.7,0.8,2,9.2
Aaron Gordon,2017-01-01,21,ORL,SF,80,72,28.7,4.9,10.8,0.454,1,...,2.7,0.719,1.5,3.6,1.9,0.8,0.5,1.1,2.2,12.7
Aaron Gordon,2018-01-01,22,ORL,PF,58,57,32.9,6.5,14.9,0.434,2,...,3.9,0.698,1.5,6.4,2.3,1,0.8,1.8,1.9,17.6
Aaron Gordon,2019-01-01,23,ORL,PF,39,39,33.4,5.8,13.1,0.442,1.5,...,3.1,0.681,1.6,5.9,3.3,0.8,0.7,1.7,2.3,15.2
Aaron Holiday,2019-01-01,22,IND,PG,23,0,11.7,2,5,0.412,0.6,...,1.1,0.885,0.1,1.6,1.5,0.5,0.3,0.7,1.3,5.7
Abdel Nader,2018-01-01,24,BOS,SF,48,1,10.9,1,3.1,0.336,0.5,...,0.8,0.59,0.3,1.2,0.5,0.3,0.2,0.7,0.9,3
Abdel Nader,2019-01-01,25,OKC,SF,22,1,7.5,1.2,2.6,0.466,0.4,...,0.2,1,0.1,1.2,0.2,0.2,0,0.4,1,3
Al Horford,2008-01-01,21,ATL,C,81,77,31.4,4.1,8.2,0.499,0,...,2.6,0.731,3.1,6.6,1.5,0.7,0.9,1.7,3.3,10.1
Al Horford,2009-01-01,22,ATL,C,67,67,33.5,4.7,8.9,0.525,0,...,3.1,0.727,2.2,7.1,2.4,0.8,1.4,1.5,2.8,11.5


In [48]:
# can keep chaining .unstack() to move indexes from rows to columns. moves from inside most index out
# eventually get a Series that is "reversed" from the .stack() one. most outer Series indexes are original column names
players_stack.unstack().unstack().unstack()

                 Year        Name               
Age              1999-01-01  Aaron Gordon            NaN
                             Aaron Holiday           NaN
                             Abdel Nader             NaN
                             Al Horford              NaN
                             Al-Farouq Aminu         NaN
                             Alec Burks              NaN
                             Alex Abrines            NaN
                             Alex Caruso             NaN
                             Alex Len                NaN
                             Alex Poythress          NaN
                             Alfonzo McKinnie        NaN
                             Alize Johnson           NaN
                             Allonzo Trier           NaN
                             Amile Jefferson         NaN
                             Amir Johnson            NaN
                             Andre Drummond          NaN
                             Andre Iguo

# The .unstack() Method, Part 2

In [49]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Name", "Year"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Aaron Gordon,2015-01-01,19,ORL,PF,47,8,17.0,2.0,4.4,0.447,0.3,...,1.3,0.721,1.0,2.6,0.7,0.4,0.5,0.8,1.8,5.2
Aaron Gordon,2016-01-01,20,ORL,PF,78,37,23.9,3.5,7.4,0.473,0.5,...,2.5,0.668,2.0,4.5,1.6,0.8,0.7,0.8,2.0,9.2
Aaron Gordon,2017-01-01,21,ORL,SF,80,72,28.7,4.9,10.8,0.454,1.0,...,2.7,0.719,1.5,3.6,1.9,0.8,0.5,1.1,2.2,12.7


In [50]:
players_stack = players.stack()
players_stack.head(3)

Name          Year                
Aaron Gordon  2015-01-01  Age          19
                          Team        ORL
                          Position     PF
dtype: object

In [51]:
# normal unstack
players_stack.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Aaron Gordon,2015-01-01,19,ORL,PF,47,8,17,2,4.4,0.447,0.3,...,1.3,0.721,1,2.6,0.7,0.4,0.5,0.8,1.8,5.2
Aaron Gordon,2016-01-01,20,ORL,PF,78,37,23.9,3.5,7.4,0.473,0.5,...,2.5,0.668,2,4.5,1.6,0.8,0.7,0.8,2,9.2
Aaron Gordon,2017-01-01,21,ORL,SF,80,72,28.7,4.9,10.8,0.454,1,...,2.7,0.719,1.5,3.6,1.9,0.8,0.5,1.1,2.2,12.7
Aaron Gordon,2018-01-01,22,ORL,PF,58,57,32.9,6.5,14.9,0.434,2,...,3.9,0.698,1.5,6.4,2.3,1,0.8,1.8,1.9,17.6
Aaron Gordon,2019-01-01,23,ORL,PF,39,39,33.4,5.8,13.1,0.442,1.5,...,3.1,0.681,1.6,5.9,3.3,0.8,0.7,1.7,2.3,15.2
Aaron Holiday,2019-01-01,22,IND,PG,23,0,11.7,2,5,0.412,0.6,...,1.1,0.885,0.1,1.6,1.5,0.5,0.3,0.7,1.3,5.7
Abdel Nader,2018-01-01,24,BOS,SF,48,1,10.9,1,3.1,0.336,0.5,...,0.8,0.59,0.3,1.2,0.5,0.3,0.2,0.7,0.9,3
Abdel Nader,2019-01-01,25,OKC,SF,22,1,7.5,1.2,2.6,0.466,0.4,...,0.2,1,0.1,1.2,0.2,0.2,0,0.4,1,3
Al Horford,2008-01-01,21,ATL,C,81,77,31.4,4.1,8.2,0.499,0,...,2.6,0.731,3.1,6.6,1.5,0.7,0.9,1.7,3.3,10.1
Al Horford,2009-01-01,22,ATL,C,67,67,33.5,4.7,8.9,0.525,0,...,3.1,0.727,2.2,7.1,2.4,0.8,1.4,1.5,2.8,11.5


In [52]:
# can provide .unstack() with the index you want to move from our multiindex. outter most layer(Name) has index position of 0
# argument 0 moves Name to column labels instead of Age,Team, Position...
players_stack.unstack(0)

Unnamed: 0_level_0,Name,Aaron Gordon,Aaron Holiday,Abdel Nader,Al Horford,Al-Farouq Aminu,Alec Burks,Alex Abrines,Alex Caruso,Alex Len,Alex Poythress,...,Will Barton,Willie Cauley-Stein,Wilson Chandler,Yogi Ferrell,Yuta Watanabe,Zach Collins,Zach LaVine,Zach Lofton,Zach Randolph,Zaza Pachulia
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1999-01-01,Age,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,Team,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,Position,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,Games,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,Games Started,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,Minutes Per Game,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,Field Goals Per Game,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,Field Goal Attempts Per Game,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,Field Goal Percent,,,,,,,,,,,...,,,,,,,,,,
1999-01-01,3-Point Field Goals Per Game,,,,,,,,,,,...,,,,,,,,,,


In [53]:
# can use negative indexes to unstack
players_stack.unstack(-1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Team,Position,Games,Games Started,Minutes Per Game,Field Goals Per Game,Field Goal Attempts Per Game,Field Goal Percent,3-Point Field Goals Per Game,...,Free Throw Attempts Per Game,Free Throw Percent,Offensive Rebounds Per Game,Defensive Rebounds Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Personal Fouls Per Game,Points Per Game
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Aaron Gordon,2015-01-01,19,ORL,PF,47,8,17,2,4.4,0.447,0.3,...,1.3,0.721,1,2.6,0.7,0.4,0.5,0.8,1.8,5.2
Aaron Gordon,2016-01-01,20,ORL,PF,78,37,23.9,3.5,7.4,0.473,0.5,...,2.5,0.668,2,4.5,1.6,0.8,0.7,0.8,2,9.2
Aaron Gordon,2017-01-01,21,ORL,SF,80,72,28.7,4.9,10.8,0.454,1,...,2.7,0.719,1.5,3.6,1.9,0.8,0.5,1.1,2.2,12.7
Aaron Gordon,2018-01-01,22,ORL,PF,58,57,32.9,6.5,14.9,0.434,2,...,3.9,0.698,1.5,6.4,2.3,1,0.8,1.8,1.9,17.6
Aaron Gordon,2019-01-01,23,ORL,PF,39,39,33.4,5.8,13.1,0.442,1.5,...,3.1,0.681,1.6,5.9,3.3,0.8,0.7,1.7,2.3,15.2
Aaron Holiday,2019-01-01,22,IND,PG,23,0,11.7,2,5,0.412,0.6,...,1.1,0.885,0.1,1.6,1.5,0.5,0.3,0.7,1.3,5.7
Abdel Nader,2018-01-01,24,BOS,SF,48,1,10.9,1,3.1,0.336,0.5,...,0.8,0.59,0.3,1.2,0.5,0.3,0.2,0.7,0.9,3
Abdel Nader,2019-01-01,25,OKC,SF,22,1,7.5,1.2,2.6,0.466,0.4,...,0.2,1,0.1,1.2,0.2,0.2,0,0.4,1,3
Al Horford,2008-01-01,21,ATL,C,81,77,31.4,4.1,8.2,0.499,0,...,2.6,0.731,3.1,6.6,1.5,0.7,0.9,1.7,3.3,10.1
Al Horford,2009-01-01,22,ATL,C,67,67,33.5,4.7,8.9,0.525,0,...,3.1,0.727,2.2,7.1,2.4,0.8,1.4,1.5,2.8,11.5


In [54]:
# can also use index name to specify which index to move. Age,Team,Position... indexes have no usable name
players_stack.unstack("Year")

Unnamed: 0_level_0,Year,1999-01-01 00:00:00,2000-01-01 00:00:00,2001-01-01 00:00:00,2002-01-01 00:00:00,2003-01-01 00:00:00,2004-01-01 00:00:00,2005-01-01 00:00:00,2006-01-01 00:00:00,2007-01-01 00:00:00,2008-01-01 00:00:00,...,2010-01-01 00:00:00,2011-01-01 00:00:00,2012-01-01 00:00:00,2013-01-01 00:00:00,2014-01-01 00:00:00,2015-01-01 00:00:00,2016-01-01 00:00:00,2017-01-01 00:00:00,2018-01-01 00:00:00,2019-01-01 00:00:00
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Aaron Gordon,Age,,,,,,,,,,,...,,,,,,19,20,21,22,23
Aaron Gordon,Team,,,,,,,,,,,...,,,,,,ORL,ORL,ORL,ORL,ORL
Aaron Gordon,Position,,,,,,,,,,,...,,,,,,PF,PF,SF,PF,PF
Aaron Gordon,Games,,,,,,,,,,,...,,,,,,47,78,80,58,39
Aaron Gordon,Games Started,,,,,,,,,,,...,,,,,,8,37,72,57,39
Aaron Gordon,Minutes Per Game,,,,,,,,,,,...,,,,,,17,23.9,28.7,32.9,33.4
Aaron Gordon,Field Goals Per Game,,,,,,,,,,,...,,,,,,2,3.5,4.9,6.5,5.8
Aaron Gordon,Field Goal Attempts Per Game,,,,,,,,,,,...,,,,,,4.4,7.4,10.8,14.9,13.1
Aaron Gordon,Field Goal Percent,,,,,,,,,,,...,,,,,,0.447,0.473,0.454,0.434,0.442
Aaron Gordon,3-Point Field Goals Per Game,,,,,,,,,,,...,,,,,,0.3,0.5,1,2,1.5


# The .unstack() Method, Part 3

In [56]:
players = pd.read_csv("npo_players.csv", parse_dates=["Year"], index_col=["Name", "Year"])
del players["Unnamed: 0"]
players.sort_index(inplace=True)
players_stack = players.stack()
players_stack.head(3)

Name          Year                
Aaron Gordon  2015-01-01  Age          19
                          Team        ORL
                          Position     PF
dtype: object

In [57]:
# can unstack multiple levels at a time in chosen order
# outer level is now Year beacause it was at position 1, second level is Name 
players_stack.unstack(level=[1,0])

Year,2015-01-01,2016-01-01,2017-01-01,2018-01-01,2019-01-01,2019-01-01,2018-01-01,2019-01-01,2008-01-01,2009-01-01,...,2010-01-01,2011-01-01,2012-01-01,2013-01-01,2014-01-01,2015-01-01,2016-01-01,2017-01-01,2018-01-01,2019-01-01
Name,Aaron Gordon,Aaron Gordon,Aaron Gordon,Aaron Gordon,Aaron Gordon,Aaron Holiday,Abdel Nader,Abdel Nader,Al Horford,Al Horford,...,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia
Age,19,20,21,22,23,22,24,25,21,22,...,25,26,27,28,29,30,31,32,33,34
Team,ORL,ORL,ORL,ORL,ORL,IND,BOS,OKC,ATL,ATL,...,ATL,ATL,ATL,ATL,MIL,MIL,DAL,GSW,GSW,DET
Position,PF,PF,SF,PF,PF,PG,SF,SF,C,C,...,C,C,C,C,C,C,C,C,C,C
Games,47,78,80,58,39,23,48,22,81,67,...,78,79,58,52,53,73,76,70,69,32
Games Started,8,37,72,57,39,0,1,1,77,67,...,1,7,44,15,43,45,69,70,57,0
Minutes Per Game,17,23.9,28.7,32.9,33.4,11.7,10.9,7.5,31.4,33.5,...,14,15.7,28.3,21.8,25,23.7,26.4,18.1,14.1,13.1
Field Goals Per Game,2,3.5,4.9,6.5,5.8,2,1,1.2,4.1,4.7,...,1.5,1.4,2.9,2.2,2.8,3.3,2.9,2.3,2.2,1.3
Field Goal Attempts Per Game,4.4,7.4,10.8,14.9,13.1,5,3.1,2.6,8.2,8.9,...,3.2,2.9,5.8,4.6,6.6,7.2,6.2,4.4,3.8,3.2
Field Goal Percent,0.447,0.473,0.454,0.434,0.442,0.412,0.336,0.466,0.499,0.525,...,0.488,0.461,0.499,0.473,0.427,0.454,0.466,0.534,0.564,0.412
3-Point Field Goals Per Game,0.3,0.5,1,2,1.5,0.6,0.5,0.4,0,0,...,0,0,0,0,0,0,0,0,0,0


In [59]:
# can also use names instead of index positions
players_stack.unstack(level=["Name", "Year"])

Name,Aaron Gordon,Aaron Gordon,Aaron Gordon,Aaron Gordon,Aaron Gordon,Aaron Holiday,Abdel Nader,Abdel Nader,Al Horford,Al Horford,...,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia,Zaza Pachulia
Year,2015-01-01,2016-01-01,2017-01-01,2018-01-01,2019-01-01,2019-01-01,2018-01-01,2019-01-01,2008-01-01,2009-01-01,...,2010-01-01,2011-01-01,2012-01-01,2013-01-01,2014-01-01,2015-01-01,2016-01-01,2017-01-01,2018-01-01,2019-01-01
Age,19,20,21,22,23,22,24,25,21,22,...,25,26,27,28,29,30,31,32,33,34
Team,ORL,ORL,ORL,ORL,ORL,IND,BOS,OKC,ATL,ATL,...,ATL,ATL,ATL,ATL,MIL,MIL,DAL,GSW,GSW,DET
Position,PF,PF,SF,PF,PF,PG,SF,SF,C,C,...,C,C,C,C,C,C,C,C,C,C
Games,47,78,80,58,39,23,48,22,81,67,...,78,79,58,52,53,73,76,70,69,32
Games Started,8,37,72,57,39,0,1,1,77,67,...,1,7,44,15,43,45,69,70,57,0
Minutes Per Game,17,23.9,28.7,32.9,33.4,11.7,10.9,7.5,31.4,33.5,...,14,15.7,28.3,21.8,25,23.7,26.4,18.1,14.1,13.1
Field Goals Per Game,2,3.5,4.9,6.5,5.8,2,1,1.2,4.1,4.7,...,1.5,1.4,2.9,2.2,2.8,3.3,2.9,2.3,2.2,1.3
Field Goal Attempts Per Game,4.4,7.4,10.8,14.9,13.1,5,3.1,2.6,8.2,8.9,...,3.2,2.9,5.8,4.6,6.6,7.2,6.2,4.4,3.8,3.2
Field Goal Percent,0.447,0.473,0.454,0.434,0.442,0.412,0.336,0.466,0.499,0.525,...,0.488,0.461,0.499,0.473,0.427,0.454,0.466,0.534,0.564,0.412
3-Point Field Goals Per Game,0.3,0.5,1,2,1.5,0.6,0.5,0.4,0,0,...,0,0,0,0,0,0,0,0,0,0


In [61]:
# pandas has to provide NaN to cells that don't have a value
players_stack.unstack("Year")

Unnamed: 0_level_0,Year,1999-01-01 00:00:00,2000-01-01 00:00:00,2001-01-01 00:00:00,2002-01-01 00:00:00,2003-01-01 00:00:00,2004-01-01 00:00:00,2005-01-01 00:00:00,2006-01-01 00:00:00,2007-01-01 00:00:00,2008-01-01 00:00:00,...,2010-01-01 00:00:00,2011-01-01 00:00:00,2012-01-01 00:00:00,2013-01-01 00:00:00,2014-01-01 00:00:00,2015-01-01 00:00:00,2016-01-01 00:00:00,2017-01-01 00:00:00,2018-01-01 00:00:00,2019-01-01 00:00:00
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Aaron Gordon,Age,,,,,,,,,,,...,,,,,,19,20,21,22,23
Aaron Gordon,Team,,,,,,,,,,,...,,,,,,ORL,ORL,ORL,ORL,ORL
Aaron Gordon,Position,,,,,,,,,,,...,,,,,,PF,PF,SF,PF,PF
Aaron Gordon,Games,,,,,,,,,,,...,,,,,,47,78,80,58,39
Aaron Gordon,Games Started,,,,,,,,,,,...,,,,,,8,37,72,57,39
Aaron Gordon,Minutes Per Game,,,,,,,,,,,...,,,,,,17,23.9,28.7,32.9,33.4
Aaron Gordon,Field Goals Per Game,,,,,,,,,,,...,,,,,,2,3.5,4.9,6.5,5.8
Aaron Gordon,Field Goal Attempts Per Game,,,,,,,,,,,...,,,,,,4.4,7.4,10.8,14.9,13.1
Aaron Gordon,Field Goal Percent,,,,,,,,,,,...,,,,,,0.447,0.473,0.454,0.434,0.442
Aaron Gordon,3-Point Field Goals Per Game,,,,,,,,,,,...,,,,,,0.3,0.5,1,2,1.5


In [62]:
# can use fill_value parameter to replace all NaN values
players_stack.unstack("Year", fill_value=0)

Unnamed: 0_level_0,Year,1999-01-01 00:00:00,2000-01-01 00:00:00,2001-01-01 00:00:00,2002-01-01 00:00:00,2003-01-01 00:00:00,2004-01-01 00:00:00,2005-01-01 00:00:00,2006-01-01 00:00:00,2007-01-01 00:00:00,2008-01-01 00:00:00,...,2010-01-01 00:00:00,2011-01-01 00:00:00,2012-01-01 00:00:00,2013-01-01 00:00:00,2014-01-01 00:00:00,2015-01-01 00:00:00,2016-01-01 00:00:00,2017-01-01 00:00:00,2018-01-01 00:00:00,2019-01-01 00:00:00
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Aaron Gordon,Age,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,19,20,21,22,23
Aaron Gordon,Team,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,ORL,ORL,ORL,ORL,ORL
Aaron Gordon,Position,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,PF,PF,SF,PF,PF
Aaron Gordon,Games,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,47,78,80,58,39
Aaron Gordon,Games Started,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,8,37,72,57,39
Aaron Gordon,Minutes Per Game,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,17,23.9,28.7,32.9,33.4
Aaron Gordon,Field Goals Per Game,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,3.5,4.9,6.5,5.8
Aaron Gordon,Field Goal Attempts Per Game,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,4.4,7.4,10.8,14.9,13.1
Aaron Gordon,Field Goal Percent,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0.447,0.473,0.454,0.434,0.442
Aaron Gordon,3-Point Field Goals Per Game,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0.3,0.5,1,2,1.5


# The .pivot() Method

In [67]:
sales = pd.read_csv("salesmen.csv", parse_dates = ["Date"])
sales["Salesman"] = sales["Salesman"].astype("category")
sales.head()

Unnamed: 0,Date,Salesman,Revenue
0,2016-01-01,Bob,7172
1,2016-01-02,Bob,6362
2,2016-01-03,Bob,5982
3,2016-01-04,Bob,7917
4,2016-01-05,Bob,7837


In [70]:
# use .pivot() to change the shape of your dataframe, takes three arguments to change index, columns, and values of datatable
sales.pivot(index="Date", columns="Salesman", values="Revenue")

Salesman,Bob,Dave,Jeb,Oscar,Ronald
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01,7172,1864,4430,5250,2639
2016-01-02,6362,8278,8026,8661,4951
2016-01-03,5982,4226,5188,7075,2703
2016-01-04,7917,3868,3144,2524,4258
2016-01-05,7837,2287,938,2793,7771
2016-01-06,1744,7859,8702,7794,5930
2016-01-07,918,8597,4250,9728,1933
2016-01-08,9863,3092,9719,5263,5709
2016-01-09,8337,1794,5614,7144,4707
2016-01-10,7543,7105,301,7663,8267


# The .pivot_table() Method

In [73]:
foods = pd.read_csv("foods.csv")
foods.head()

Unnamed: 0,First Name,Gender,City,Frequency,Item,Spend
0,Wanda,Female,Stamford,Weekly,Burger,15.66
1,Eric,Male,Stamford,Daily,Chalupa,10.56
2,Charles,Male,New York,Never,Sushi,42.14
3,Anna,Female,Philadelphia,Once,Ice Cream,11.01
4,Deborah,Female,Philadelphia,Daily,Chalupa,23.49


In [74]:
# use .pivot_table() to run aggregate functions on values from a dataframe
# takes all the values from Spend column, groups them based on Gender, returns the mean of values  
foods.pivot_table(values="Spend", index="Gender", aggfunc="mean")

Unnamed: 0_level_0,Spend
Gender,Unnamed: 1_level_1
Female,50.709629
Male,49.397623


In [75]:
# can group by multiindex
foods.pivot_table(values="Spend", index=["Gender", "Item"], aggfunc="mean")

Unnamed: 0_level_0,Unnamed: 1_level_0,Spend
Gender,Item,Unnamed: 2_level_1
Female,Burger,49.930488
Female,Burrito,50.092
Female,Chalupa,54.635
Female,Donut,49.926316
Female,Ice Cream,49.788519
Female,Sushi,50.355699
Male,Burger,49.613919
Male,Burrito,48.344819
Male,Chalupa,49.186761
Male,Donut,43.649565


In [76]:
# multiindex order matters
foods.pivot_table(values="Spend", index=["Item", "Gender"], aggfunc="mean")

Unnamed: 0_level_0,Unnamed: 1_level_0,Spend
Item,Gender,Unnamed: 2_level_1
Burger,Female,49.930488
Burger,Male,49.613919
Burrito,Female,50.092
Burrito,Male,48.344819
Chalupa,Female,54.635
Chalupa,Male,49.186761
Donut,Female,49.926316
Donut,Male,43.649565
Ice Cream,Female,49.788519
Ice Cream,Male,51.096


In [79]:
# can add columns to group data
foods.pivot_table(values="Spend", index=["Item", "Gender"], columns="City", aggfunc="mean")

Unnamed: 0_level_0,City,New York,Philadelphia,Stamford
Item,Gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Burger,Female,51.626667,52.87871,45.037778
Burger,Male,58.822273,44.675238,46.424516
Burrito,Female,42.563043,52.098571,53.532647
Burrito,Male,55.976,43.764333,46.438929
Chalupa,Female,46.135789,52.291563,64.094
Chalupa,Male,49.1108,48.444783,50.011304
Donut,Female,46.670323,54.642,48.734118
Donut,Male,44.842333,37.859394,49.004483
Ice Cream,Female,56.356296,46.225625,46.910455
Ice Cream,Male,55.297586,53.44561,42.3688


In [81]:
# can add multiple columns to group by
foods.pivot_table(values="Spend", index=["Item", "Gender"], columns=["Frequency", "City"], aggfunc="mean")

Unnamed: 0_level_0,Frequency,Daily,Daily,Daily,Monthly,Monthly,Monthly,Never,Never,Never,Often,...,Once,Seldom,Seldom,Seldom,Weekly,Weekly,Weekly,Yearly,Yearly,Yearly
Unnamed: 0_level_1,City,New York,Philadelphia,Stamford,New York,Philadelphia,Stamford,New York,Philadelphia,Stamford,New York,...,Stamford,New York,Philadelphia,Stamford,New York,Philadelphia,Stamford,New York,Philadelphia,Stamford
Item,Gender,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Burger,Female,43.778333,77.226667,48.22,57.286667,53.7625,59.6225,97.89,54.7425,45.485,23.74,...,31.683333,31.58,58.435714,48.765,92.175,16.0,31.004,64.825,61.585,51.171667
Burger,Male,63.892,37.566667,49.43,62.43,71.046667,13.58,90.32,8.655,,27.735,...,36.293333,75.226667,47.015,53.25,69.69,33.296667,77.5525,24.805,49.34,45.014
Burrito,Female,44.89,53.595,39.126,40.913333,17.14,67.94,47.4325,63.716667,52.334286,34.533333,...,56.003333,83.77,49.5275,78.163333,13.23,31.41,46.182,35.63,38.916667,43.245
Burrito,Male,78.736667,41.44,69.0575,49.18,29.86,39.866667,28.926667,47.29,70.368,47.48,...,15.075,67.466667,27.71,9.84,64.185,48.208333,40.4625,55.175,59.255,32.83
Chalupa,Female,43.19,23.49,95.7,79.185,72.49,80.99,35.15,30.4925,52.12,39.73,...,40.59,40.0,54.902,58.416667,42.88,28.136667,68.23,52.606667,56.048889,69.632
Chalupa,Male,27.045,68.7025,48.16,66.752,45.35,57.293333,39.818,48.596,46.233333,62.88,...,,11.69,65.375,34.804,54.4,33.92,44.37,55.913333,34.405,58.095
Donut,Female,39.841667,61.85,41.45,71.1325,50.25,45.86,56.07,72.263333,52.443333,32.6575,...,79.12,30.27,45.8125,34.886667,71.39,69.6,55.0075,62.95,58.41,56.12
Donut,Male,46.0,47.6775,64.71,45.9325,51.858,29.8825,43.926,26.825,54.91,46.6,...,27.978,16.25,33.003333,40.8275,37.22,38.6,62.254,35.775,22.305,16.52
Ice Cream,Female,65.5475,59.23,46.44,46.265,37.255,41.95,68.716667,39.0075,77.66,58.065,...,55.866,80.783333,50.775,58.865,56.905,47.546667,25.006,37.9175,39.965,15.24
Ice Cream,Male,59.406667,29.738,15.17,50.733333,64.425,74.36,28.77,80.485,39.923333,69.51,...,36.542,67.605,69.22,20.415,44.772,55.31,47.884,53.233333,47.988,32.106667


In [84]:
# lots of different aggregate functions you can try out
foods.pivot_table(values="Spend", index=["Item", "Gender"], columns="City", aggfunc="max").head(3)

Unnamed: 0_level_0,City,New York,Philadelphia,Stamford
Item,Gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Burger,Female,98.96,97.79,85.06
Burger,Male,90.32,99.68,97.2
Burrito,Female,92.25,96.79,99.21


In [86]:
# can call .pivot_table() directly on pd and pass dataframe as first argument if you want
pd.pivot_table(data=foods, values="Spend", index=["Item", "Gender"], columns="City", aggfunc="max").head(3)

Unnamed: 0_level_0,City,New York,Philadelphia,Stamford
Item,Gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Burger,Female,98.96,97.79,85.06
Burger,Male,90.32,99.68,97.2
Burrito,Female,92.25,96.79,99.21


# The pd.melt() Method

In [88]:
sales = pd.read_csv("quarters.csv")
sales

Unnamed: 0,Salesman,Q1,Q2,Q3,Q4
0,Boris,602908,233879,354479,32704
1,Bob,43790,514863,297151,544493
2,Tommy,392668,113579,430882,247231
3,Travis,834663,266785,749238,570524
4,Donald,580935,411379,110390,651572
5,Ted,656644,70803,375948,321388
6,Jeb,486141,600753,742716,404995
7,Stacy,479662,742806,770712,2501
8,Morgan,992673,879183,37945,293710


In [91]:
# pd.melt() takes an aggregate dataset and converts it to a tabular format
# essentially take all columns from original and condense them into a single variable column
pd.melt(frame=sales, id_vars="Salesman")

Unnamed: 0,Salesman,variable,value
0,Boris,Q1,602908
1,Bob,Q1,43790
2,Tommy,Q1,392668
3,Travis,Q1,834663
4,Donald,Q1,580935
5,Ted,Q1,656644
6,Jeb,Q1,486141
7,Stacy,Q1,479662
8,Morgan,Q1,992673
9,Boris,Q2,233879


In [93]:
# use parameters to rename columns
pd.melt(frame=sales, id_vars="Salesman", var_name="Quarter", value_name="Revenue")

Unnamed: 0,Salesman,Quarter,Revenue
0,Boris,Q1,602908
1,Bob,Q1,43790
2,Tommy,Q1,392668
3,Travis,Q1,834663
4,Donald,Q1,580935
5,Ted,Q1,656644
6,Jeb,Q1,486141
7,Stacy,Q1,479662
8,Morgan,Q1,992673
9,Boris,Q2,233879
