In [None]:
import pandas as pd

# Pandas MultiIndex examples

When creating a DataFrame with different columns, the column names are accessible using `df.columns`, which will return an `Index` object.

In [None]:
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
print(df.columns)
df

Index(['col1', 'col2'], dtype='object')


Unnamed: 0,col1,col2
0,1,3
1,2,4


After adding a new column, it appears in the `Index` returned by `df.columns`.

In [None]:
df['col3'] = [5, 6]
print(df.columns)
df

Index(['col1', 'col2', 'col3'], dtype='object')


Unnamed: 0,col1,col2,col3
0,1,3,5
1,2,4,6


After dropping a column, using `axis=1`, it is no longer in the `Index` object.

In [None]:
x = df.drop(['col1'], axis=1)
display(x.columns)
x

Index(['col2', 'col3'], dtype='object')

Unnamed: 0,col2,col3
0,3,5
1,4,6


But how does it work for a hierarchical MultiIndex?

In [None]:
columns = pd.MultiIndex.from_product([['head', 'body'], ['x', 'y']],
                                     names=['bodypart', 'coordinates'])

df = pd.DataFrame([[1, 2, 3, 4], [1, 2, 3, 4]], columns=columns)
df

bodypart,head,head,body,body
coordinates,x,y,x,y
0,1,2,3,4
1,1,2,3,4


In [None]:
display(df.columns)
display(df.columns.levels)
display(list(df.columns.levels[0]))

MultiIndex([('head', 'x'),
            ('head', 'y'),
            ('body', 'x'),
            ('body', 'y')],
           names=['bodypart', 'coordinates'])

FrozenList([['body', 'head'], ['x', 'y']])

['body', 'head']

What happens if we add another column? Will it appear in the column index?

In [None]:
df['tail', 'x'] = 5
df['tail', 'y'] = 9

display(df)

display(df.columns)
display(df.columns.levels)
display(list(df.columns.levels[0]))

bodypart,head,head,body,body,tail,tail
coordinates,x,y,x,y,x,y
0,1,2,3,4,5,9
1,1,2,3,4,5,9


MultiIndex([('head', 'x'),
            ('head', 'y'),
            ('body', 'x'),
            ('body', 'y'),
            ('tail', 'x'),
            ('tail', 'y')],
           names=['bodypart', 'coordinates'])

FrozenList([['body', 'head', 'tail'], ['x', 'y']])

['body', 'head', 'tail']

It seems to be in columns level 0!
And what happens if the drop on of the original columns? Is the columns `Index` updated accordingly?

In [None]:
df_drop = df.drop(['body'], axis=1)

display(df_drop)

display(df_drop.columns)
display(df_drop.columns.levels)
display(list(df_drop.columns.levels[0]))

bodypart,head,head,tail,tail
coordinates,x,y,x,y
0,1,2,5,9
1,1,2,5,9


MultiIndex([('head', 'x'),
            ('head', 'y'),
            ('tail', 'x'),
            ('tail', 'y')],
           names=['bodypart', 'coordinates'])

FrozenList([['body', 'head', 'tail'], ['x', 'y']])

['body', 'head', 'tail']

So we can see, dropping a column does not remove it from the column index (which is backed by a `FrozenList`)! While some might consider this a bug, the [pandas developers think](https://github.com/pandas-dev/pandas/issues/3686) this is a philosophical question and actually works as intended 🙃.

However, there is a good workarond:

In [None]:
display(df_drop.columns.get_level_values(0).unique())

Index(['head', 'tail'], dtype='object', name='bodypart')

There is another way, which means setting a new column index. While it seems this is a reasonable approach for some use cases, there might be unforseen (performance) implication, which are the reasons, that this is not the default behaviour.

In [None]:
df_drop.columns = df_drop.columns.remove_unused_levels()
display(df_drop.columns.levels)

FrozenList([['head', 'tail'], ['x', 'y']])