# Target
Given an example of a multi-level column dataframe, filter the data with specified conditions and then sort them with ascending / descending order.

**Example**

In [15]:
import pandas as pd

df = pd.DataFrame(
    data={"data_provider": ["prov_1", "prov_1", "prov_2", "prov_2", "prov_3", "prov_3"],
          "indicator": ["ind_a", "ind_a", "ind_a", "ind_b", "ind_b", "ind_b"],
          "unit": ["EUR", "EUR", "EUR", "EUR", "EUR", "EUR"],
          "year": ["2017", "2018","2019", "2017","2018","2019"],
          "country1": [1, 2, 3, 2, 4, 6],
          "country2": [4, 5, 6, 40, 50, 60]}
)

df = df.pivot_table(
    index=['data_provider', 'indicator'],
    columns=['year', 'country1'],
    fill_value=30)

df

  df = df.pivot_table(


Unnamed: 0_level_0,Unnamed: 1_level_0,country2,country2,country2,country2,country2,country2
Unnamed: 0_level_1,year,2017,2017,2018,2018,2019,2019
Unnamed: 0_level_2,country1,1,2,2,4,3,6
data_provider,indicator,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
prov_1,ind_a,4,30,5,30,30,30
prov_2,ind_a,30,30,30,30,6,30
prov_2,ind_b,30,40,30,30,30,30
prov_3,ind_b,30,30,30,50,30,60


1.1 columns and any other related info

⬇ column labels

In [16]:
df.columns

MultiIndex([('country2', '2017', 1),
            ('country2', '2017', 2),
            ('country2', '2018', 2),
            ('country2', '2018', 4),
            ('country2', '2019', 3),
            ('country2', '2019', 6)],
           names=[None, 'year', 'country1'])

⬇ column levels

In [17]:
df.columns.nlevels

3

⬇ column names

In [18]:
df.columns.names

FrozenList([None, 'year', 'country1'])

1.2 indexes and any other related info

⬇ index

In [19]:
df.index

MultiIndex([('prov_1', 'ind_a'),
            ('prov_2', 'ind_a'),
            ('prov_2', 'ind_b'),
            ('prov_3', 'ind_b')],
           names=['data_provider', 'indicator'])

⬇ index levels

In [20]:
df.index.nlevels

2

⬇ index names

In [21]:
df.index.names

FrozenList(['data_provider', 'indicator'])

# Filter

2.1 get the level number where column labels' name is `country1`

In [22]:
collev = df.columns._get_level_number(level='country1')
collev

2

2.2 get the column labels where `country1` is `2`

In [23]:
collabels = df.columns[df.columns.get_level_values(level=collev)==2]
collabels

MultiIndex([('country2', '2017', 2),
            ('country2', '2018', 2)],
           names=[None, 'year', 'country1'])

2.3 set conditions for filtering the data

In [24]:
collabels[0]

('country2', '2017', 2)

In [25]:
condition = (df[collabels[0]] <= 30) & (df[collabels[1]] <= 30)
condition

data_provider  indicator
prov_1         ind_a         True
prov_2         ind_a         True
               ind_b        False
prov_3         ind_b         True
dtype: bool

In [26]:
newdf = df[condition]
newdf

Unnamed: 0_level_0,Unnamed: 1_level_0,country2,country2,country2,country2,country2,country2
Unnamed: 0_level_1,year,2017,2017,2018,2018,2019,2019
Unnamed: 0_level_2,country1,1,2,2,4,3,6
data_provider,indicator,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
prov_1,ind_a,4,30,5,30,30,30
prov_2,ind_a,30,30,30,30,6,30
prov_3,ind_b,30,30,30,50,30,60


In [27]:
collabels.to_list()

[('country2', '2017', 2), ('country2', '2018', 2)]

# Sort

sort values with specified column labels

In [28]:
sortednewdf = newdf.sort_values(collabels.to_list(), ascending=False)
sortednewdf

Unnamed: 0_level_0,Unnamed: 1_level_0,country2,country2,country2,country2,country2,country2
Unnamed: 0_level_1,year,2017,2017,2018,2018,2019,2019
Unnamed: 0_level_2,country1,1,2,2,4,3,6
data_provider,indicator,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
prov_2,ind_a,30,30,30,30,6,30
prov_3,ind_b,30,30,30,50,30,60
prov_1,ind_a,4,30,5,30,30,30
