## Sorting dataframe using `sort_values()`

In [1]:
import pandas as pd

df_exams = pd.read_csv("datasets/StudentsPerformance.csv")

In [2]:
df_exams.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [3]:
# Sort by one column. Returns new object
df_exams.sort_values(by='math score')

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
59,female,group C,some high school,free/reduced,none,0,17,10
980,female,group B,high school,free/reduced,none,8,24,23
17,female,group B,some high school,free/reduced,none,18,32,28
787,female,group B,some college,standard,none,19,38,32
145,female,group C,some college,free/reduced,none,22,39,33
...,...,...,...,...,...,...,...,...
623,male,group A,some college,standard,completed,100,96,86
625,male,group D,some college,standard,completed,100,97,99
962,female,group E,associate's degree,standard,none,100,100,100
458,female,group E,bachelor's degree,standard,none,100,100,100


In [4]:
# Sort descending by column. Returns new object
df_exams.sort_values('math score', ascending=False)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
451,female,group E,some college,standard,none,100,92,97
458,female,group E,bachelor's degree,standard,none,100,100,100
962,female,group E,associate's degree,standard,none,100,100,100
149,male,group E,associate's degree,free/reduced,completed,100,100,93
623,male,group A,some college,standard,completed,100,96,86
...,...,...,...,...,...,...,...,...
145,female,group C,some college,free/reduced,none,22,39,33
787,female,group B,some college,standard,none,19,38,32
17,female,group B,some high school,free/reduced,none,18,32,28
980,female,group B,high school,free/reduced,none,8,24,23


In [5]:
# In-place sorting
df_exams.sort_values('math score', inplace=True)

In [6]:
# Sort descending by multiple columns. Returns new object
df_exams.sort_values(['math score', 'reading score', 'writing score'], ascending=False)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
962,female,group E,associate's degree,standard,none,100,100,100
458,female,group E,bachelor's degree,standard,none,100,100,100
916,male,group E,bachelor's degree,standard,completed,100,100,100
149,male,group E,associate's degree,free/reduced,completed,100,100,93
625,male,group D,some college,standard,completed,100,97,99
...,...,...,...,...,...,...,...,...
145,female,group C,some college,free/reduced,none,22,39,33
787,female,group B,some college,standard,none,19,38,32
17,female,group B,some high school,free/reduced,none,18,32,28
980,female,group B,high school,free/reduced,none,8,24,23


In [7]:
# Sort with a key function
df_exams.sort_values('race/ethnicity', ascending=True, key=lambda col: col.str.lower())

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
576,male,group A,some college,standard,completed,61,51,52
62,male,group A,associate's degree,free/reduced,none,62,61,55
365,male,group A,bachelor's degree,free/reduced,completed,49,58,60
356,male,group A,associate's degree,standard,none,63,61,61
994,male,group A,high school,standard,none,63,63,62
...,...,...,...,...,...,...,...,...
861,female,group E,master's degree,free/reduced,none,81,86,87
332,male,group E,associate's degree,standard,completed,62,56,53
725,male,group E,some college,standard,completed,81,74,71
35,male,group E,associate's degree,standard,completed,81,81,79


In [9]:
# Custom sorting sequence using Dict + col.map()

ed_levels = list(df_exams['parental level of education'].unique())
ed_levels_map = {label: num for num, label in enumerate(['some high school', 'high school', 'some college',
       "associate's degree", "bachelor's degree", "master's degree"])}

df_exams.sort_values('parental level of education', key=lambda col: col.map(ed_levels_map), inplace=True)
df_exams

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
59,female,group C,some high school,free/reduced,none,0,17,10
61,male,group A,some high school,free/reduced,none,39,39,34
171,male,group E,some high school,standard,none,94,88,78
815,male,group B,some high school,standard,completed,94,86,87
233,male,group E,some high school,standard,none,92,87,78
...,...,...,...,...,...,...,...,...
225,female,group E,master's degree,free/reduced,none,45,56,54
25,male,group A,master's degree,free/reduced,none,73,74,72
130,male,group D,master's degree,standard,none,89,84,82
735,male,group C,master's degree,standard,none,67,57,59
