# Pandas Index Objects

## First Steps

In [28]:
import pandas as pd
summer = pd.read_csv("summer.csv", index_col = "Athlete")
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [30]:
summer.info()

<class 'pandas.core.frame.DataFrame'>
Index: 31165 entries, HAJOS, Alfred to LIDBERG, Jimmy
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Year        31165 non-null  int64 
 1   City        31165 non-null  object
 2   Sport       31165 non-null  object
 3   Discipline  31165 non-null  object
 4   Country     31161 non-null  object
 5   Gender      31165 non-null  object
 6   Event       31165 non-null  object
 7   Medal       31165 non-null  object
dtypes: int64(1), object(7)
memory usage: 2.1+ MB


In [32]:
summer.index

Index(['HAJOS, Alfred', 'HERSCHMANN, Otto', 'DRIVAS, Dimitrios',
       'MALOKINIS, Ioannis', 'CHASAPIS, Spiridon', 'CHOROPHAS, Efstathios',
       'HAJOS, Alfred', 'ANDREOU, Joannis', 'CHOROPHAS, Efstathios',
       'NEUMANN, Paul',
       ...
       'AHMADOV, Emin', 'KAZAKEVIC, Aleksandr', 'KHUGAEV, Alan',
       'EBRAHIM, Karam Mohamed Gaber', 'GAJIYEV, Danyal', 'JANIKOWSKI, Damian',
       'REZAEI, Ghasem Gholamreza', 'TOTROV, Rustam', 'ALEKSANYAN, Artur',
       'LIDBERG, Jimmy'],
      dtype='object', name='Athlete', length=31165)

In [34]:
type(summer.index)

pandas.core.indexes.base.Index

In [36]:
summer.columns

Index(['Year', 'City', 'Sport', 'Discipline', 'Country', 'Gender', 'Event',
       'Medal'],
      dtype='object')

In [38]:
type(summer.columns)

pandas.core.indexes.base.Index

In [40]:
#it looks a little bit like a list or array
#but actually it has specific methods/attributes specific to index objects

In [42]:
summer.axes #shows both row and column indexes essentially 

[Index(['HAJOS, Alfred', 'HERSCHMANN, Otto', 'DRIVAS, Dimitrios',
        'MALOKINIS, Ioannis', 'CHASAPIS, Spiridon', 'CHOROPHAS, Efstathios',
        'HAJOS, Alfred', 'ANDREOU, Joannis', 'CHOROPHAS, Efstathios',
        'NEUMANN, Paul',
        ...
        'AHMADOV, Emin', 'KAZAKEVIC, Aleksandr', 'KHUGAEV, Alan',
        'EBRAHIM, Karam Mohamed Gaber', 'GAJIYEV, Danyal', 'JANIKOWSKI, Damian',
        'REZAEI, Ghasem Gholamreza', 'TOTROV, Rustam', 'ALEKSANYAN, Artur',
        'LIDBERG, Jimmy'],
       dtype='object', name='Athlete', length=31165),
 Index(['Year', 'City', 'Sport', 'Discipline', 'Country', 'Gender', 'Event',
        'Medal'],
       dtype='object')]

In [46]:
summer.columns[:3] #we can slice indexes

Index(['Year', 'City', 'Sport'], dtype='object')

In [51]:
summer.index[0]

'HAJOS, Alfred'

In [53]:
summer.columns.tolist()

['Year', 'City', 'Sport', 'Discipline', 'Country', 'Gender', 'Event', 'Medal']

In [55]:
summer.index.is_unique

False

In [57]:
summer.index.get_loc("DRIVAS, Dimitrios")

2

## Changing Row Index Labels

In [66]:
summer = pd.read_csv("summer.csv", index_col = "Athlete")
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [68]:
summer.index

Index(['HAJOS, Alfred', 'HERSCHMANN, Otto', 'DRIVAS, Dimitrios',
       'MALOKINIS, Ioannis', 'CHASAPIS, Spiridon', 'CHOROPHAS, Efstathios',
       'HAJOS, Alfred', 'ANDREOU, Joannis', 'CHOROPHAS, Efstathios',
       'NEUMANN, Paul',
       ...
       'AHMADOV, Emin', 'KAZAKEVIC, Aleksandr', 'KHUGAEV, Alan',
       'EBRAHIM, Karam Mohamed Gaber', 'GAJIYEV, Danyal', 'JANIKOWSKI, Damian',
       'REZAEI, Ghasem Gholamreza', 'TOTROV, Rustam', 'ALEKSANYAN, Artur',
       'LIDBERG, Jimmy'],
      dtype='object', name='Athlete', length=31165)

In [171]:
summer.index.value_counts() #we can still use value counts

PHELPS, Michael          22
LATYNINA, Larisa         18
ANDRIANOV, Nikolay       15
ONO, Takashi             13
MANGIAROTTI, Edoardo     13
                         ..
ZAKA, Uddin               1
ZAFAR, Hayat              1
MUHAMMAD, Rashid          1
MANNA, Muhammad Afzal     1
LIDBERG, Jimmy            1
Name: Athlete, Length: 22762, dtype: int64

In [76]:
#what if we are not happy with our new index and we want to reset it back?
summer.reset_index(inplace = True)
#the athlete index then becomes a column

In [78]:
summer.head()

Unnamed: 0,Athlete,Year,City,Sport,Discipline,Country,Gender,Event,Medal
0,"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
1,"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
2,"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
3,"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
4,"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [80]:
#we can also set index after importing
summer.set_index("Year", inplace = True)
summer.head()

Unnamed: 0_level_0,Athlete,City,Sport,Discipline,Country,Gender,Event,Medal
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1896,"HAJOS, Alfred",Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
1896,"HERSCHMANN, Otto",Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
1896,"DRIVAS, Dimitrios",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
1896,"MALOKINIS, Ioannis",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
1896,"CHASAPIS, Spiridon",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [83]:
summer.index.is_unique

False

In [87]:
#what if we want to change just ONE index value?
summer.index[0] = 1894
#we cannot do this! but we can pass a list with the exact same amount of elements

TypeError: Index does not support mutable operations

In [89]:
summer.index.size

31165

In [107]:
new_index = ["Modal_No {}".format(i) for i in range(1,summer.index.size+1)]
new_index[:5]

['Modal_No 1', 'Modal_No 2', 'Modal_No 3', 'Modal_No 4', 'Modal_No 5']

In [97]:
#now we can change the index!!!! 
summer.index = new_index

In [99]:
summer.head()

Unnamed: 0,Athlete,City,Sport,Discipline,Country,Gender,Event,Medal
Modal_No 1,"HAJOS, Alfred",Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
Modal_No 2,"HERSCHMANN, Otto",Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
Modal_No 3,"DRIVAS, Dimitrios",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
Modal_No 4,"MALOKINIS, Ioannis",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
Modal_No 5,"CHASAPIS, Spiridon",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [101]:
summer.index.name = "Medal_No"

In [103]:
summer.head()

Unnamed: 0_level_0,Athlete,City,Sport,Discipline,Country,Gender,Event,Medal
Medal_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Modal_No 1,"HAJOS, Alfred",Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
Modal_No 2,"HERSCHMANN, Otto",Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
Modal_No 3,"DRIVAS, Dimitrios",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
Modal_No 4,"MALOKINIS, Ioannis",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
Modal_No 5,"CHASAPIS, Spiridon",Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


## Changing Column Labels

In [114]:
titanic = pd.read_csv("titanic.csv")
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [116]:
titanic.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'deck'],
      dtype='object')

In [118]:
titanic.columns[0]

'survived'

In [120]:
titanic.columns[0] = "Alive" 

TypeError: Index does not support mutable operations

In [128]:
#BOTH COLUMN AND ROW INDEX ARE NOT MUTABLE!!!
#WE MUST CHANGE THE WHOLE SEQUENCE WITH THE SAME AMOUNT OF ELEMENTS

In [130]:
titanic.columns = ["Alive", "Class", "Sex", "Age", "SibSp", "ParChi", "Fare", "Emb", "Deck"]
titanic.head()

Unnamed: 0,Alive,Class,Sex,Age,SibSp,ParChi,Fare,Emb,Deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## Renaming without needing to supply the entire list!! using .rename()

In [137]:
summer = pd.read_csv("summer.csv", index_col = "Athlete")
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [141]:
summer.index[0] = "HAYOS, Alfred"  #doesn't work because index objects are immutable

TypeError: Index does not support mutable operations

In [151]:
#change from x to y --> for the index axis
summer.rename(index = {"HAJOS, Alfred" : "HAYOS, Alfred"}, inplace = True) 

In [153]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAYOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [165]:
#change from x to y --> for the index axis
summer.rename(columns = {"Sex": "Gender", "City":"Host_City"}, inplace = True) 

In [167]:
summer.head()

Unnamed: 0_level_0,Year,Host_City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAYOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
