---
# Indices (Indexes?)
How to use, set, and reset indices.

---

In [1]:
import pandas as pd
import numpy as np

In [2]:
people = {
    "first": ["Lorem", "Foo", "Cat"],
    "last": ["Ipsum", "Bar", "Dog"],
    "email": ["loripsum@a.a", "foobar@a.a", "catdog@a.a"],
}

df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Lorem,Ipsum,loripsum@a.a
1,Foo,Bar,foobar@a.a
2,Cat,Dog,catdog@a.a


### Frequent Reminder
It is good practice to see your changes first by not setting inplace to True.  
If changes are confirmed, you can set inplace to True.

---

In [3]:
# Set the column "email" to be the index. <inplace> parameter means to
# save the changes of df since pandas normally just returns a dataframe instead
# of changing the original df. 
df.set_index("email", inplace=True)
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
loripsum@a.a,Lorem,Ipsum
foobar@a.a,Foo,Bar
catdog@a.a,Cat,Dog


In [4]:
# Now we can use loc to filter out through the email indices.
df.loc["loripsum@a.a":"foobar@a.a"]

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
loripsum@a.a,Lorem,Ipsum
foobar@a.a,Foo,Bar


In [5]:
# Reset indices to default
# Resetting index will place the current column index as the first column.
df.reset_index(inplace=True)
df

Unnamed: 0,email,first,last
0,loripsum@a.a,Lorem,Ipsum
1,foobar@a.a,Foo,Bar
2,catdog@a.a,Cat,Dog


---
## == Example from stackoverflow Data Set ==
---

In [None]:
# Load csv files as df and set indices to corresponding columns.
df = pd.read_csv("data/survey_results_public_2022.csv", index_col="ResponseId")
schema_df = pd.read_csv("data/survey_results_schema.csv", index_col="qname")

In [None]:
# Configure display options
pd.set_option("display.max_rows", 80)
pd.set_option("display.max_columns", 80)

In [None]:
# return top 5 items from the top of df
df.head()

In [None]:
# Select a row and see its record

df.loc[38]

In [None]:
# You may notice that some columns are not descriptive enough.
# To check for what a field means, we can
# use the schema provided along with the survey.

schema_df.loc["PurchaseInfluence", "question"]

In [None]:
# Sort label indices
schema_df.sort_index()