# 05_03: Indexing pandas DataFrames

In [1]:
import math
import collections
import dataclasses
import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

In [2]:
nobels = pd.read_csv('nobels.csv', names=['year', 'discipline', 'nobelist', 'DOB'],
                     dtype_backend='pyarrow', engine='pyarrow')

In [3]:
nobels.head()

Unnamed: 0,year,discipline,nobelist,DOB
0,1901,Chemistry,Jacobus H. van 't Hoff,1852-08-30
1,1901,Literature,Sully Prudhomme,1839-03-16
2,1901,Medicine,Emil von Behring,1854-03-15
3,1901,Peace,Frédéric Passy,1822-05-20
4,1901,Peace,Henry Dunant,1828-05-08


In [4]:
nobels.index

RangeIndex(start=0, stop=1000, step=1)

In [5]:
nobels_by_year = nobels.set_index('year')

In [6]:
nobels_by_year

Unnamed: 0_level_0,discipline,nobelist,DOB
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1901,Chemistry,Jacobus H. van 't Hoff,1852-08-30
1901,Literature,Sully Prudhomme,1839-03-16
1901,Medicine,Emil von Behring,1854-03-15
1901,Peace,Frédéric Passy,1822-05-20
1901,Peace,Henry Dunant,1828-05-08
...,...,...,...
2023,Medicine,Katalin Karikó,1955-01-17
2023,Peace,Narges Mohammadi,1972-04-21
2023,Physics,Anne L’Huillier,1958-08-16
2023,Physics,Ferenc Krausz,1962-05-17


In [7]:
nobels_by_year.index

Index([1901, 1901, 1901, 1901, 1901, 1901, 1902, 1902, 1902, 1902,
       ...
       2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023],
      dtype='int64[pyarrow]', name='year', length=1000)

In [8]:
nobels_by_year.loc[1901]

Unnamed: 0_level_0,discipline,nobelist,DOB
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1901,Chemistry,Jacobus H. van 't Hoff,1852-08-30
1901,Literature,Sully Prudhomme,1839-03-16
1901,Medicine,Emil von Behring,1854-03-15
1901,Peace,Frédéric Passy,1822-05-20
1901,Peace,Henry Dunant,1828-05-08
1901,Physics,Wilhelm Conrad Röntgen,1845-03-27


In [9]:
nobels_by_year.loc[1901, 'nobelist']

year
1901    Jacobus H. van 't Hoff
1901           Sully Prudhomme
1901          Emil von Behring
1901            Frédéric Passy
1901              Henry Dunant
1901    Wilhelm Conrad Röntgen
Name: nobelist, dtype: string[pyarrow]

In [10]:
nobels_by_year.loc[1914:1918]

Unnamed: 0_level_0,discipline,nobelist,DOB
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1914,Chemistry,Theodore W. Richards,1868-01-31
1914,Medicine,Robert Bárány,1876-04-22
1914,Physics,Max von Laue,1879-10-09
1915,Chemistry,Richard Willstätter,1872-08-13
1915,Literature,Romain Rolland,1866-01-29
1915,Physics,Lawrence Bragg,1890-03-31
1915,Physics,William Bragg,1862-07-02
1916,Literature,Verner von Heidenstam,1859-07-06
1917,Literature,Henrik Pontoppidan,1857-07-24
1917,Literature,Karl Gjellerup,1857-06-02


In [11]:
nobels_by_discipline = nobels.set_index('discipline')

In [12]:
nobels_by_discipline.loc['Chemistry']

Unnamed: 0_level_0,year,nobelist,DOB
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chemistry,1901,Jacobus H. van 't Hoff,1852-08-30
Chemistry,1902,Emil Fischer,1852-10-09
Chemistry,1903,Svante Arrhenius,1859-02-19
Chemistry,1904,Sir William Ramsay,1852-10-02
Chemistry,1905,Adolf von Baeyer,1835-10-31
...,...,...,...
Chemistry,2022,Carolyn Bertozzi,1966-10-10
Chemistry,2022,Morten Meldal,
Chemistry,2023,Aleksey Yekimov,
Chemistry,2023,Louis Brus,


In [13]:
nobels_by_discipline.loc['Chemistry':'Physics']

KeyError: "Cannot get left slice bound for non-unique label: 'Chemistry'"

In [14]:
nobels_by_discipline.sort_index().loc['Chemistry']

Unnamed: 0_level_0,year,nobelist,DOB
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chemistry,1901,Jacobus H. van 't Hoff,1852-08-30
Chemistry,1902,Emil Fischer,1852-10-09
Chemistry,1903,Svante Arrhenius,1859-02-19
Chemistry,1904,Sir William Ramsay,1852-10-02
Chemistry,1905,Adolf von Baeyer,1835-10-31
...,...,...,...
Chemistry,2022,Carolyn Bertozzi,1966-10-10
Chemistry,2022,Morten Meldal,
Chemistry,2023,Aleksey Yekimov,
Chemistry,2023,Louis Brus,


In [15]:
nobels_by_discipline.sort_index().loc[['Physics', 'Chemistry']]

Unnamed: 0_level_0,year,nobelist,DOB
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Physics,1901,Wilhelm Conrad Röntgen,1845-03-27
Physics,1902,Hendrik A. Lorentz,1853-07-18
Physics,1902,Pieter Zeeman,1865-05-25
Physics,1903,Henri Becquerel,1852-12-15
Physics,1903,Marie Curie,1867-11-07
...,...,...,...
Chemistry,2022,Carolyn Bertozzi,1966-10-10
Chemistry,2022,Morten Meldal,
Chemistry,2023,Aleksey Yekimov,
Chemistry,2023,Louis Brus,


In [16]:
nobels_by_year.iloc[0:10]

Unnamed: 0_level_0,discipline,nobelist,DOB
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1901,Chemistry,Jacobus H. van 't Hoff,1852-08-30
1901,Literature,Sully Prudhomme,1839-03-16
1901,Medicine,Emil von Behring,1854-03-15
1901,Peace,Frédéric Passy,1822-05-20
1901,Peace,Henry Dunant,1828-05-08
1901,Physics,Wilhelm Conrad Röntgen,1845-03-27
1902,Chemistry,Emil Fischer,1852-10-09
1902,Literature,Theodor Mommsen,1817-11-30
1902,Medicine,Ronald Ross,1857-05-13
1902,Peace,Albert Gobat,1843-05-21


In [17]:
nobels_multi = nobels.set_index(['year', 'discipline'])

In [18]:
nobels_multi.index

MultiIndex([(1901,  'Chemistry'),
            (1901, 'Literature'),
            (1901,   'Medicine'),
            (1901,      'Peace'),
            (1901,      'Peace'),
            (1901,    'Physics'),
            (1902,  'Chemistry'),
            (1902, 'Literature'),
            (1902,   'Medicine'),
            (1902,      'Peace'),
            ...
            (2023,  'Chemistry'),
            (2023,  'Chemistry'),
            (2023,  'Economics'),
            (2023, 'Literature'),
            (2023,   'Medicine'),
            (2023,   'Medicine'),
            (2023,      'Peace'),
            (2023,    'Physics'),
            (2023,    'Physics'),
            (2023,    'Physics')],
           names=['year', 'discipline'], length=1000)

In [19]:
nobels_multi.index.get_level_values(0)

Index([1901, 1901, 1901, 1901, 1901, 1901, 1902, 1902, 1902, 1902,
       ...
       2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023],
      dtype='int64[pyarrow]', name='year', length=1000)

In [20]:
nobels_multi.index.get_level_values(1)

Index(['Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Peace',
       ...
       'Chemistry', 'Chemistry', 'Economics', 'Literature', 'Medicine',
       'Medicine', 'Peace', 'Physics', 'Physics', 'Physics'],
      dtype='string[pyarrow]', name='discipline', length=1000)

In [21]:
nobels_multi.loc[(2017, 'Physics')]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist,DOB
year,discipline,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,Physics,Barry C. Barish,1936-01-27
2017,Physics,Kip S. Thorne,1940-06-01
2017,Physics,Rainer Weiss,1932-09-29


In [22]:
nobels_multi.loc[2017]

Unnamed: 0_level_0,nobelist,DOB
discipline,Unnamed: 1_level_1,Unnamed: 2_level_1
Chemistry,Jacques Dubochet,1942-06-08
Chemistry,Joachim Frank,1940-09-12
Chemistry,Richard Henderson,1945-07-19
Economics,Richard H. Thaler,1945-09-12
Literature,Kazuo Ishiguro,1954-11-08
Medicine,Jeffrey C. Hall,1945-05-03
Medicine,Michael Rosbash,1944-03-07
Medicine,Michael W. Young,1949-03-28
Peace,International Campaign to Abolish Nuclear Weapons,
Physics,Barry C. Barish,1936-01-27


In [23]:
nobels_multi.loc[pd.IndexSlice[1901:1910, 'Chemistry'], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist,DOB
year,discipline,Unnamed: 2_level_1,Unnamed: 3_level_1
1901,Chemistry,Jacobus H. van 't Hoff,1852-08-30
1902,Chemistry,Emil Fischer,1852-10-09
1903,Chemistry,Svante Arrhenius,1859-02-19
1904,Chemistry,Sir William Ramsay,1852-10-02
1905,Chemistry,Adolf von Baeyer,1835-10-31
1906,Chemistry,Henri Moissan,1852-09-28
1907,Chemistry,Eduard Buchner,1860-05-20
1908,Chemistry,Ernest Rutherford,1871-08-30
1909,Chemistry,Wilhelm Ostwald,1853-09-02
1910,Chemistry,Otto Wallach,1847-03-27


In [24]:
nobels_multi.loc[pd.IndexSlice[:, 'Chemistry'], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist,DOB
year,discipline,Unnamed: 2_level_1,Unnamed: 3_level_1
1901,Chemistry,Jacobus H. van 't Hoff,1852-08-30
1902,Chemistry,Emil Fischer,1852-10-09
1903,Chemistry,Svante Arrhenius,1859-02-19
1904,Chemistry,Sir William Ramsay,1852-10-02
1905,Chemistry,Adolf von Baeyer,1835-10-31
...,...,...,...
2022,Chemistry,Carolyn Bertozzi,1966-10-10
2022,Chemistry,Morten Meldal,
2023,Chemistry,Aleksey Yekimov,
2023,Chemistry,Louis Brus,
