In [1]:
import pandas as pd

In [3]:
# reading and showing dataframe
df_exams = pd.read_csv('StudentsPerformance.csv')
df_exams.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


## 1. Sort a dataframe

In [6]:
# sort by one column
df_exams.sort_values(by='math score')

# shift tab to access the parameters. we can ommit the by and it still works.

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
59,female,group C,some high school,free/reduced,none,0,17,10
980,female,group B,high school,free/reduced,none,8,24,23
17,female,group B,some high school,free/reduced,none,18,32,28
787,female,group B,some college,standard,none,19,38,32
145,female,group C,some college,free/reduced,none,22,39,33
...,...,...,...,...,...,...,...,...
625,male,group D,some college,standard,completed,100,97,99
623,male,group A,some college,standard,completed,100,96,86
451,female,group E,some college,standard,none,100,92,97
962,female,group E,associate's degree,standard,none,100,100,100


In [7]:
# As we can see here the df was sorted ascending by default. So it starts with 0 and ends with 100 in the math score 

In [8]:
# sort descending by one colum
df_exams.sort_values('math score', ascending=False)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
962,female,group E,associate's degree,standard,none,100,100,100
625,male,group D,some college,standard,completed,100,97,99
458,female,group E,bachelor's degree,standard,none,100,100,100
623,male,group A,some college,standard,completed,100,96,86
451,female,group E,some college,standard,none,100,92,97
...,...,...,...,...,...,...,...,...
145,female,group C,some college,free/reduced,none,22,39,33
787,female,group B,some college,standard,none,19,38,32
17,female,group B,some high school,free/reduced,none,18,32,28
980,female,group B,high school,free/reduced,none,8,24,23


In [9]:
# As i can see here it's sorted descending by the math score column. So it starts with 100 and ends with 0

In [10]:
# sort descending by multiple columns
df_exams.sort_values(['math score', 'reading score'], ascending=False)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
149,male,group E,associate's degree,free/reduced,completed,100,100,93
458,female,group E,bachelor's degree,standard,none,100,100,100
916,male,group E,bachelor's degree,standard,completed,100,100,100
962,female,group E,associate's degree,standard,none,100,100,100
625,male,group D,some college,standard,completed,100,97,99
...,...,...,...,...,...,...,...,...
145,female,group C,some college,free/reduced,none,22,39,33
787,female,group B,some college,standard,none,19,38,32
17,female,group B,some high school,free/reduced,none,18,32,28
980,female,group B,high school,free/reduced,none,8,24,23


In [14]:
# As i can see here, it was sorted descending first by the math score column and then by the reading score column
# the first priority is the math score column and the second priority is the reading score column

# the changes we made weren't updated. So the dataframe has its original values. This happens because the method only creates a copy of the df
# So the result is a copy of the df, but it doesn't update the values of the dataframe.

In [15]:
# sort descending by multiple columns and updating dataframe
df_exams.sort_values(['math score', 'reading score'], ascending=False, 
                     inplace=True)

In [18]:
# And now the values of the dataframe are updated.

In [19]:
# sort descending with a key function
df_exams.sort_values('race/ethnicity', ascending=True,
                     key=lambda col:col.str.lower())

# lambda <object that it returns>:<operation we have to make over this variable>

# it is supposed to return the column col.. and the operation we have to make over this variables is, in this case
# I want to write col and then access the string attribute (.str) and then use the lower() method.

# So what we're saying here is get the string values of the column and then trnasform them to lower case.

# And with these three arguments we're saying: sort the values inside the 'race/ethnicity' column, and sort 
# it ascending and then sort the text data in this column in lower case

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
741,female,group A,associate's degree,free/reduced,none,37,57,56
151,male,group A,bachelor's degree,standard,none,77,67,68
811,male,group A,high school,free/reduced,none,45,47,49
112,male,group A,associate's degree,standard,none,54,53,47
25,male,group A,master's degree,free/reduced,none,73,74,72
...,...,...,...,...,...,...,...,...
751,male,group E,some college,standard,none,68,72,65
915,female,group E,some college,standard,none,68,70,66
592,male,group E,bachelor's degree,standard,none,68,68,64
479,male,group E,associate's degree,standard,none,76,71,67


In [20]:
# And that that's it. These are different ways to sort a dataframe using the sort_values() method.