# Target
Given an example of a multi-level column dataframe, filter the data with specified conditions and then sort them with ascending / descending order.

**Example**

In [1]:
import pandas as pd

In [2]:
d = {'Year': {0: 1960, 1: 1960, 2: 1960, 3: 1961, 4: 1961, 5: 1961, 6: 1962, 7: 1962, 8: 1962, 9: 1963, 10: 1963, 11: 1963, 12: 1963, 13: 1964, 14: 1964, 15: 1964, 16: 1965, 17: 1965, 18: 1965},
     'Sex': {0: 'Female', 1: 'Female', 2: 'Male', 3: 'Male', 4: 'Female', 5: 'Female', 6: 'Female', 7: 'Male', 8: 'Male', 9: 'Female', 10: 'Female', 11: 'Female', 12: 'Female', 13: 'Male', 14: 'Male', 15: 'Male', 16: 'Female', 17: 'Female', 18: 'Female'},
     'Rank': {0: 1, 1: 2, 2: 3, 3: 1, 4: 2, 5: 3, 6: 1, 7: 2, 8: 3, 9: 1, 10: 2, 11: 3, 12: 1, 13: 1, 14: 2, 15: 3, 16: 1, 17: 2, 18: 3},
     'Name': {0: 'SUSAN', 1: 'MARY', 2: 'KAREN', 3: 'LISA', 4: 'SUSAN', 5: 'KAREN', 6: 'LISA', 7: 'SUSAN', 8: 'KAREN', 9: 'LISA', 10: 'SUSAN', 11: 'MARY', 12: 'KAREN', 13: 'ANTHONY', 14: 'CHARLES', 15: 'ERIC', 16: 'LISA', 17: 'KIMBERLY*', 18: 'KAREN'},
     'Count': {0: 3299, 1: 3248, 2: 3156, 3: 3497, 4: 3221, 5: 2985, 6: 3698, 7: 3088, 8: 2935, 9: 4661, 10: 2920, 11: 2788, 12: 2723, 13: 1970, 14: 1880, 15: 1878, 16: 4629, 17: 2822, 18: 2633}}
df = pd.DataFrame(d)
df

Unnamed: 0,Year,Sex,Rank,Name,Count
0,1960,Female,1,SUSAN,3299
1,1960,Female,2,MARY,3248
2,1960,Male,3,KAREN,3156
3,1961,Male,1,LISA,3497
4,1961,Female,2,SUSAN,3221
5,1961,Female,3,KAREN,2985
6,1962,Female,1,LISA,3698
7,1962,Male,2,SUSAN,3088
8,1962,Male,3,KAREN,2935
9,1963,Female,1,LISA,4661


In [3]:
df = pd.pivot_table(
    data=df,
    index=['Year'],
    columns=['Sex', 'Rank'],
    fill_value=3000
)
df

  df = pd.pivot_table(


Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count
Sex,Female,Female,Female,Male,Male,Male
Rank,1,2,3,1,2,3
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
1960,3299,3248,3000,3000,3000,3156
1961,3000,3221,2985,3497,3000,3000
1962,3698,3000,3000,3000,3088,2935
1963,3692,2920,2788,3000,3000,3000
1964,3000,3000,3000,1970,1880,1878
1965,4629,2822,2633,3000,3000,3000


⬇ column labels

In [4]:
df.columns

MultiIndex([('Count', 'Female', 1),
            ('Count', 'Female', 2),
            ('Count', 'Female', 3),
            ('Count',   'Male', 1),
            ('Count',   'Male', 2),
            ('Count',   'Male', 3)],
           names=[None, 'Sex', 'Rank'])

⬇ column level numbers

In [5]:
df.columns.nlevels

3

⬇ column level names

In [6]:
df.columns.names

FrozenList([None, 'Sex', 'Rank'])

⬇ index labels

In [7]:
df.index

Int64Index([1960, 1961, 1962, 1963, 1964, 1965], dtype='int64', name='Year')

⬇ index level numbers

In [8]:
df.index.nlevels

1

⬇ index level names

In [9]:
df.index.names

FrozenList(['Year'])

# Filter

2.1 give the first condition, where the values the column `('count', '*', '2')` are in the range `(1800, 2900)`

In [10]:
columns1 = df.columns[df.columns.get_level_values(level='Rank')==2]
columns1

MultiIndex([('Count', 'Female', 2),
            ('Count',   'Male', 2)],
           names=[None, 'Sex', 'Rank'])

In [11]:
condition1 = (df.columns.get_level_values(level='Rank')==2) & df.isin(range(1800, 2900))
condition1

Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count
Sex,Female,Female,Female,Male,Male,Male
Rank,1,2,3,1,2,3
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
1960,False,False,False,False,False,False
1961,False,False,False,False,False,False
1962,False,False,False,False,False,False
1963,False,False,False,False,False,False
1964,False,False,False,False,True,False
1965,False,True,False,False,False,False


2.2 give the second condition, where the values the column `('count', '*', '3')` are greater than `3000`

In [12]:
columns2 = df.columns[df.columns.get_level_values(level='Rank')==3]
columns2

MultiIndex([('Count', 'Female', 3),
            ('Count',   'Male', 3)],
           names=[None, 'Sex', 'Rank'])

In [13]:
condition2 = (df.columns.get_level_values(level='Rank')==3) & df.ge(3000)
condition2

Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count
Sex,Female,Female,Female,Male,Male,Male
Rank,1,2,3,1,2,3
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
1960,False,False,True,False,False,True
1961,False,False,False,False,False,True
1962,False,False,True,False,False,False
1963,False,False,False,False,False,True
1964,False,False,True,False,False,False
1965,False,False,False,False,False,True


# Sort

- [x] `method 1`<br>
use `df.where(condition)` to filter the original dataframe

In [14]:
method1result = df.where(condition1 | condition2)
method1result

Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count
Sex,Female,Female,Female,Male,Male,Male
Rank,1,2,3,1,2,3
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
1960,,,3000.0,,,3156.0
1961,,,,,,3000.0
1962,,,3000.0,,,
1963,,,,,,3000.0
1964,,,3000.0,,1880.0,
1965,,2822.0,,,,3000.0


- [x] `method 2`<br>
use 2 conditions to filter the original dataframe and get 2 new dataframe with the same shape. then udpate the former one with the latter one with `combine_first`

In [15]:
df1 = df[condition1]
df1

Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count
Sex,Female,Female,Female,Male,Male,Male
Rank,1,2,3,1,2,3
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
1960,,,,,,
1961,,,,,,
1962,,,,,,
1963,,,,,,
1964,,,,,1880.0,
1965,,2822.0,,,,


In [16]:
df2 = df[condition2]
df2

Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count
Sex,Female,Female,Female,Male,Male,Male
Rank,1,2,3,1,2,3
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
1960,,,3000.0,,,3156.0
1961,,,,,,3000.0
1962,,,3000.0,,,
1963,,,,,,3000.0
1964,,,3000.0,,,
1965,,,,,,3000.0


In [17]:
method2result = df1.combine_first(df2)
method2result

Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count
Sex,Female,Female,Female,Male,Male,Male
Rank,1,2,3,1,2,3
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
1960,,,3000.0,,,3156.0
1961,,,,,,3000.0
1962,,,3000.0,,,
1963,,,,,,3000.0
1964,,,3000.0,,1880.0,
1965,,2822.0,,,,3000.0


# Judge

In [18]:
flag = method1result.equals(method2result)
flag

True