In [18]:
import pandas as pd

In [19]:
# reading and showing dataframe
df_exams = pd.read_csv('StudentsPerformance.csv')
df_exams.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


## 1. Create Index

This method is going to help us set a column as a new index.

In [20]:
import numpy as np
import random

In [21]:
# creating non repetitive values for the index
new_index = np.arange(0, 1000)

# index in a df doesn't allow duplicates, the values have to be different from each other
# So we created an new_index variable which is an array of integer numbers: [0, 1, 2, ..., 999]

In [42]:
# shuffling indexes so we can sort it later
random.shuffle(new_index)

In [11]:
# apparently nothing happened, but this new_index was shuffled
# i verified that the array of integers was shuffled with print(new_index)

In [43]:
# create a new column with new index
df_exams['new_index'] = new_index
df_exams.head()

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,new_index
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
53,female,group B,bachelor's degree,standard,none,72,72,74,221
534,female,group C,some college,standard,completed,69,90,88,182
346,female,group B,master's degree,standard,none,90,95,93,764
665,male,group A,associate's degree,free/reduced,none,47,57,44,622
214,male,group C,some college,standard,none,76,78,75,586


## 2. Set index

In [44]:
# setting new_index column as index
df_exams.set_index('new_index', inplace=True) 
df_exams
# the set index method creates a copy of the df with the new index. To update the 
# df we have to set the inplace parameter to True.

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
221,female,group B,bachelor's degree,standard,none,72,72,74
182,female,group C,some college,standard,completed,69,90,88
764,female,group B,master's degree,standard,none,90,95,93
622,male,group A,associate's degree,free/reduced,none,47,57,44
586,male,group C,some college,standard,none,76,78,75
...,...,...,...,...,...,...,...,...
869,female,group E,master's degree,standard,completed,88,99,95
930,male,group C,high school,free/reduced,none,62,55,55
354,female,group C,high school,free/reduced,completed,59,71,65
315,female,group D,some college,standard,completed,68,78,77


## 3. Sort Index

This method is going to help us sort the index.

In [45]:
# sort dataframe by new_index
df_exams.sort_index()

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,male,group A,some high school,standard,none,51,31,36
1,female,group D,some college,free/reduced,none,64,74,75
2,female,group D,some high school,standard,none,59,72,80
3,male,group D,associate's degree,free/reduced,none,66,62,64
4,female,group C,some college,free/reduced,completed,42,66,69
...,...,...,...,...,...,...,...,...
995,male,group B,some high school,standard,none,74,63,57
996,female,group C,some high school,standard,none,65,69,76
997,female,group C,associate's degree,standard,completed,62,76,80
998,male,group B,associate's degree,standard,none,81,73,72


In [46]:
# As we can see here, now it's sorted ascending. So from 0 to 999.

In [48]:
# sort descending by index
df_exams.sort_index(ascending=False, inplace=True)

In [49]:
df_exams

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
999,female,group C,bachelor's degree,free/reduced,none,43,62,61
998,male,group B,associate's degree,standard,none,81,73,72
997,female,group C,associate's degree,standard,completed,62,76,80
996,female,group C,some high school,standard,none,65,69,76
995,male,group B,some high school,standard,none,74,63,57
...,...,...,...,...,...,...,...,...
4,female,group C,some college,free/reduced,completed,42,66,69
3,male,group D,associate's degree,free/reduced,none,66,62,64
2,female,group D,some high school,standard,none,59,72,80
1,female,group D,some college,free/reduced,none,64,74,75


In [50]:
# Given that we set inplace to True all the changes were saved

In [51]:
# And that's how the set index and sort index methods works in Pandas.