## Indexing in Pandas

### **Create an index**

In [3]:
import pandas as pd
import numpy as np
import random

df_exams = pd.read_csv("datasets/StudentsPerformance.csv")
df_exams

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75
...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95
996,male,group C,high school,free/reduced,none,62,55,55
997,female,group C,high school,free/reduced,completed,59,71,65
998,female,group D,some college,standard,completed,68,78,77


In [10]:
# Creating non-repetitive values for the index
# Index in dataframes aren't allowed to be duplicates

new_index = np.arange(0, 1000)

In [11]:
# Shuffle the indices

random.shuffle(new_index)

In [12]:
# Create new column with new index
new_index

array([597, 737, 141, 781, 887, 936, 919, 458, 749, 181, 385, 498, 708,
       553, 818, 942, 482,  95, 461, 243, 513, 510, 913, 496, 472, 179,
       637, 652, 804, 797, 955, 547,  86, 214, 355, 801, 351, 506, 568,
        39, 601, 160, 445,  59, 623, 888, 501, 305, 956, 326, 228,  87,
       449, 429, 946, 215, 735, 539, 358, 234, 617, 293, 211, 198, 764,
       741, 994, 272, 811, 576, 612, 736, 761, 753, 525, 815, 415, 678,
       924, 342, 361, 722, 852, 972,  36, 618, 966, 407, 398, 235, 609,
       516, 739, 468,  84,  76, 296, 814, 676, 230, 127,  56, 984, 107,
        40, 911,  49,  10, 798, 356,  26, 219, 846, 716, 640,  12, 427,
       519, 935, 177, 870, 891, 208, 837, 713, 309, 527, 900, 600, 346,
       932, 985, 686, 975, 785, 649,  35, 886, 417, 239, 115, 625, 247,
       451, 692, 548, 562, 443, 794, 284, 931, 454, 541, 664,  43, 783,
       615, 471,  48, 826, 773, 488, 340, 509,  24, 961, 671, 304, 938,
       163, 834, 403, 928, 278, 379, 930, 944, 847, 475, 369, 42

In [22]:
# Create new column with new index
# Same as creating new column with a given array of data

df_exams = df_exams.assign(new_index=new_index)

# Set new_index column as index
# This method creates a copy
# Can enable in_place update using inplace=True
df_exams = df_exams.set_index("new_index") # df.set_index("column name")
df_exams

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
597,female,group B,bachelor's degree,standard,none,72,72,74
737,female,group C,some college,standard,completed,69,90,88
141,female,group B,master's degree,standard,none,90,95,93
781,male,group A,associate's degree,free/reduced,none,47,57,44
887,male,group C,some college,standard,none,76,78,75
...,...,...,...,...,...,...,...,...
19,female,group E,master's degree,standard,completed,88,99,95
64,male,group C,high school,free/reduced,none,62,55,55
73,female,group C,high school,free/reduced,completed,59,71,65
840,female,group D,some college,standard,completed,68,78,77


### **Sort index**

In [25]:
# Sort dataframe by new index
# Returns new copy
df_exams.sort_index()

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,female,group B,high school,standard,completed,58,70,68
1,male,group D,associate's degree,standard,none,80,63,63
2,female,group D,master's degree,free/reduced,completed,85,95,100
3,female,group A,some high school,free/reduced,none,59,73,69
4,female,group C,some college,standard,none,71,81,80
...,...,...,...,...,...,...,...,...
995,female,group C,associate's degree,standard,completed,67,84,86
996,female,group A,some college,free/reduced,none,49,65,55
997,male,group C,high school,standard,none,71,66,65
998,male,group E,associate's degree,standard,none,53,45,40


In [26]:
# Sort dataframe by new index (descending)
df_exams.sort_index(ascending=False)

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
999,male,group C,some college,standard,none,58,49,42
998,male,group E,associate's degree,standard,none,53,45,40
997,male,group C,high school,standard,none,71,66,65
996,female,group A,some college,free/reduced,none,49,65,55
995,female,group C,associate's degree,standard,completed,67,84,86
...,...,...,...,...,...,...,...,...
4,female,group C,some college,standard,none,71,81,80
3,female,group A,some high school,free/reduced,none,59,73,69
2,female,group D,master's degree,free/reduced,completed,85,95,100
1,male,group D,associate's degree,standard,none,80,63,63
