In [2]:
# pandas multiindex
import pandas as pd
data_races = {
    'Race': ['Badwater', 'Barkley Marathons', 'Vero Beach Ultra', 'Forgotten Florida', 'Badwater'],
    'Year': [2020, 2021, 2020, 2021, 2025],
    'Difficulty': [9.7, 9.8, 8.1, 6.1, 9.9]
}
df = pd.DataFrame(data_races)
df

Unnamed: 0,Race,Year,Difficulty
0,Badwater,2020,9.7
1,Barkley Marathons,2021,9.8
2,Vero Beach Ultra,2020,8.1
3,Forgotten Florida,2021,6.1
4,Badwater,2025,9.9


In [3]:
df = df.set_index(['Race', 'Year'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Badwater,2020,9.7
Barkley Marathons,2021,9.8
Vero Beach Ultra,2020,8.1
Forgotten Florida,2021,6.1
Badwater,2025,9.9


In [4]:
df2 = df.copy()
df3 = df.copy()

In [5]:
df.index

MultiIndex([(         'Badwater', 2020),
            ('Barkley Marathons', 2021),
            ( 'Vero Beach Ultra', 2020),
            ('Forgotten Florida', 2021),
            (         'Badwater', 2025)],
           names=['Race', 'Year'])

In [6]:
df.index.get_level_values(0)

Index(['Badwater', 'Barkley Marathons', 'Vero Beach Ultra',
       'Forgotten Florida', 'Badwater'],
      dtype='object', name='Race')

In [7]:
df.index.get_level_values(1)

Index([2020, 2021, 2020, 2021, 2025], dtype='int64', name='Year')

In [8]:
df.loc['Badwater']         # returns all years for Badwater

Unnamed: 0_level_0,Difficulty
Year,Unnamed: 1_level_1
2020,9.7
2025,9.9


In [9]:
df.loc[('Badwater', 2020)] # returns a specific entry

Difficulty    9.7
Name: (Badwater, 2020), dtype: float64

In [10]:
df.iloc[0]  # First row (Badwater, 2020)

Difficulty    9.7
Name: (Badwater, 2020), dtype: float64

In [11]:
df.index.names = ['Event', 'YearHeld']

In [12]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Event,YearHeld,Unnamed: 2_level_1
Badwater,2020,9.7
Barkley Marathons,2021,9.8
Vero Beach Ultra,2020,8.1
Forgotten Florida,2021,6.1
Badwater,2025,9.9


In [13]:
df.rename_axis(index={'Event': 'Race', 'YearHeld': 'Year'}, inplace=True)

In [14]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Badwater,2020,9.7
Barkley Marathons,2021,9.8
Vero Beach Ultra,2020,8.1
Forgotten Florida,2021,6.1
Badwater,2025,9.9


In [15]:
df_swapped = df.swaplevel()

In [17]:
df_swapped 

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Year,Race,Unnamed: 2_level_1
2020,Badwater,9.7
2021,Barkley Marathons,9.8
2020,Vero Beach Ultra,8.1
2021,Forgotten Florida,6.1
2025,Badwater,9.9


In [18]:
df.groupby(level='Race').mean()

Unnamed: 0_level_0,Difficulty
Race,Unnamed: 1_level_1
Badwater,9.8
Barkley Marathons,9.8
Forgotten Florida,6.1
Vero Beach Ultra,8.1


In [19]:
df.groupby(level='Year').mean()

Unnamed: 0_level_0,Difficulty
Year,Unnamed: 1_level_1
2020,8.9
2021,7.95
2025,9.9


In [20]:
df.sort_index(level='Race')

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Badwater,2020,9.7
Badwater,2025,9.9
Barkley Marathons,2021,9.8
Forgotten Florida,2021,6.1
Vero Beach Ultra,2020,8.1


In [21]:
df.sort_index(level='Year', ascending = False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Badwater,2025,9.9
Forgotten Florida,2021,6.1
Barkley Marathons,2021,9.8
Vero Beach Ultra,2020,8.1
Badwater,2020,9.7


In [22]:
df.sort_values(by='Difficulty')

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Forgotten Florida,2021,6.1
Vero Beach Ultra,2020,8.1
Badwater,2020,9.7
Barkley Marathons,2021,9.8
Badwater,2025,9.9


In [23]:
idx = pd.IndexSlice
df.loc[idx[:, 2020], :]  # All races from 2020

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Badwater,2020,9.7
Vero Beach Ultra,2020,8.1


In [24]:
df[df['Difficulty'] > 8]

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Badwater,2020,9.7
Barkley Marathons,2021,9.8
Vero Beach Ultra,2020,8.1
Badwater,2025,9.9


In [26]:
df.at[('Badwater', 2020), 'Difficulty'] = 10.0
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Badwater,2020,10.0
Barkley Marathons,2021,9.8
Vero Beach Ultra,2020,8.1
Forgotten Florida,2021,6.1
Badwater,2025,9.9


In [27]:
df.loc[('Everglades Ultra', 2020), :] = [7.1]

In [28]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Race,Year,Unnamed: 2_level_1
Badwater,2020,10.0
Barkley Marathons,2021,9.8
Vero Beach Ultra,2020,8.1
Forgotten Florida,2021,6.1
Badwater,2025,9.9
Everglades Ultra,2020,7.1


In [29]:
# Get all races in 2020
df.xs(2020, level='Year')

Unnamed: 0_level_0,Difficulty
Race,Unnamed: 1_level_1
Badwater,10.0
Vero Beach Ultra,8.1
Everglades Ultra,7.1


In [30]:
df = df.reorder_levels(['Year', 'Race']).sort_index()

In [31]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Difficulty
Year,Race,Unnamed: 2_level_1
2020,Badwater,10.0
2020,Everglades Ultra,7.1
2020,Vero Beach Ultra,8.1
2021,Barkley Marathons,9.8
2021,Forgotten Florida,6.1
2025,Badwater,9.9


In [32]:
df2.unstack(level='Race')

Unnamed: 0_level_0,Difficulty,Difficulty,Difficulty,Difficulty
Race,Badwater,Barkley Marathons,Forgotten Florida,Vero Beach Ultra
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2020,9.7,,,8.1
2021,,9.8,6.1,
2025,9.9,,,


In [33]:
#Remember, starts at 0!
df2.unstack(level=1)

Unnamed: 0_level_0,Difficulty,Difficulty,Difficulty
Year,2020,2021,2025
Race,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Badwater,9.7,,9.9
Barkley Marathons,,9.8,
Forgotten Florida,,6.1,
Vero Beach Ultra,8.1,,


In [34]:
data_runners = {
    'Runner': ['Alice', 'Bob', 'Clara', 'Dave', 'Eva'],
    '50 miler': [7.5, 10.2, 8.9, 15.8, 6.3],
    '100 miler': [22.3, 35.6, 27.8, 39.1, 18.5]
}
df4 = pd.DataFrame(data_runners).set_index('Runner')

In [35]:
df4

Unnamed: 0_level_0,50 miler,100 miler
Runner,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,7.5,22.3
Bob,10.2,35.6
Clara,8.9,27.8
Dave,15.8,39.1
Eva,6.3,18.5


In [36]:
df_stacked = df4.stack()

In [37]:
df_stacked

Runner           
Alice   50 miler      7.5
        100 miler    22.3
Bob     50 miler     10.2
        100 miler    35.6
Clara   50 miler      8.9
        100 miler    27.8
Dave    50 miler     15.8
        100 miler    39.1
Eva     50 miler      6.3
        100 miler    18.5
dtype: float64

In [38]:
df_stacked_reset = df4.stack().reset_index()
df_stacked_reset

Unnamed: 0,Runner,level_1,0
0,Alice,50 miler,7.5
1,Alice,100 miler,22.3
2,Bob,50 miler,10.2
3,Bob,100 miler,35.6
4,Clara,50 miler,8.9
5,Clara,100 miler,27.8
6,Dave,50 miler,15.8
7,Dave,100 miler,39.1
8,Eva,50 miler,6.3
9,Eva,100 miler,18.5


In [39]:
df_reset_dataframe = df3.reset_index()
df_reset_dataframe

Unnamed: 0,Race,Year,Difficulty
0,Badwater,2020,9.7
1,Barkley Marathons,2021,9.8
2,Vero Beach Ultra,2020,8.1
3,Forgotten Florida,2021,6.1
4,Badwater,2025,9.9


In [40]:
data_music = [
    [200000000, 1960, 120000000, 1962, 250000000, 1965],
    [8, 10, 17, 7, 26, 12]
]

# create multi-level column index
bands = ['The Beatles', 'The Rolling Stones', 'Pink Floyd']
attributes = ['Album Sales', 'Starting Year']
multi_columns = pd.MultiIndex.from_product([bands, attributes], names=['Band', 'Attribute'])

In [41]:
data_music

[[200000000, 1960, 120000000, 1962, 250000000, 1965], [8, 10, 17, 7, 26, 12]]