# 05_05: Database operations in pandas

In [1]:
import math
import collections
import dataclasses
import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

In [2]:
actors = pd.read_csv('actors.csv', dtype_backend='pyarrow')

In [3]:
actresses = pd.read_csv('actresses.csv', dtype_backend='pyarrow')

In [4]:
actors.head(5)

Unnamed: 0,year_ceremony,category,film,name
0,1928,ACTOR,The Last Command,Emil Jannings
1,1928,ACTOR,The Noose,Richard Barthelmess
2,1929,ACTOR,Alibi,Chester Morris
3,1929,ACTOR,In Old Arizona,Warner Baxter
4,1929,ACTOR,The Patriot (1928),Lewis Stone


In [5]:
actors.tail(10)

Unnamed: 0,year_ceremony,category,film,name
902,2024,ACTOR IN A LEADING ROLE,American Fiction,Jeffrey Wright
903,2024,ACTOR IN A SUPPORTING ROLE,American Fiction,Sterling K. Brown
904,2024,ACTOR IN A SUPPORTING ROLE,Barbie,Ryan Gosling
905,2024,ACTOR IN A SUPPORTING ROLE,Killers of the Flower Moon,Robert De Niro
906,2024,ACTOR IN A LEADING ROLE,Maestro (2023),Bradley Cooper
907,2024,ACTOR IN A LEADING ROLE,Oppenheimer,Cillian Murphy
908,2024,ACTOR IN A SUPPORTING ROLE,Oppenheimer,Robert Downey Jr.
909,2024,ACTOR IN A SUPPORTING ROLE,Poor Things,Mark Ruffalo
910,2024,ACTOR IN A LEADING ROLE,Rustin,Colman Domingo
911,2024,ACTOR IN A LEADING ROLE,The Holdovers,Paul Giamatti


In [6]:
actresses.head(5)

Unnamed: 0,year_ceremony,category,film,name
0,1928,ACTRESS,7th Heaven,Janet Gaynor
1,1928,ACTRESS,A Ship Comes In,Louise Dresser
2,1928,ACTRESS,Sadie Thompson,Gloria Swanson
3,1929,ACTRESS,Coquette,Mary Pickford
4,1929,ACTRESS,Madame X,Ruth Chatterton


In [7]:
pd.concat([actors, actresses])

Unnamed: 0,year_ceremony,category,film,name
0,1928,ACTOR,The Last Command,Emil Jannings
1,1928,ACTOR,The Noose,Richard Barthelmess
2,1929,ACTOR,Alibi,Chester Morris
3,1929,ACTOR,In Old Arizona,Warner Baxter
4,1929,ACTOR,The Patriot (1928),Lewis Stone
...,...,...,...,...
911,2024,ACTRESS IN A SUPPORTING ROLE,Nyad,Jodie Foster
912,2024,ACTRESS IN A SUPPORTING ROLE,Oppenheimer,Emily Blunt
913,2024,ACTRESS IN A LEADING ROLE,Poor Things,Emma Stone
914,2024,ACTRESS IN A SUPPORTING ROLE,The Color Purple (2023),Danielle Brooks


In [8]:
nominations = pd.concat([actors, actresses]).sort_values(['year_ceremony', 'category']).reset_index(drop=True)

In [9]:
nominations

Unnamed: 0,year_ceremony,category,film,name
0,1928,ACTOR,The Last Command,Emil Jannings
1,1928,ACTOR,The Noose,Richard Barthelmess
2,1928,ACTRESS,7th Heaven,Janet Gaynor
3,1928,ACTRESS,A Ship Comes In,Louise Dresser
4,1928,ACTRESS,Sadie Thompson,Gloria Swanson
...,...,...,...,...
1823,2024,ACTRESS IN A SUPPORTING ROLE,Barbie,America Ferrera
1824,2024,ACTRESS IN A SUPPORTING ROLE,Nyad,Jodie Foster
1825,2024,ACTRESS IN A SUPPORTING ROLE,Oppenheimer,Emily Blunt
1826,2024,ACTRESS IN A SUPPORTING ROLE,The Color Purple (2023),Danielle Brooks


In [10]:
movies = pd.read_csv('movies.csv', dtype_backend='pyarrow')

In [11]:
movies.head()

Unnamed: 0,film,year_film
0,"$1,000 a Minute",1935
1,'38',1986
2,'Crocodile' Dundee,1986
3,'Round Midnight,1986
4,(A) Torzija [(A) Torsion],2003


In [12]:
pd.merge(nominations, movies, on='film')

Unnamed: 0,year_ceremony,category,film,name,year_film
0,1928,ACTOR,The Last Command,Emil Jannings,1927
1,1928,ACTOR,The Noose,Richard Barthelmess,1927
2,1928,ACTRESS,7th Heaven,Janet Gaynor,1927
3,1928,ACTRESS,A Ship Comes In,Louise Dresser,1927
4,1928,ACTRESS,Sadie Thompson,Gloria Swanson,1927
...,...,...,...,...,...
1823,2024,ACTRESS IN A SUPPORTING ROLE,Barbie,America Ferrera,2023
1824,2024,ACTRESS IN A SUPPORTING ROLE,Nyad,Jodie Foster,2023
1825,2024,ACTRESS IN A SUPPORTING ROLE,Oppenheimer,Emily Blunt,2023
1826,2024,ACTRESS IN A SUPPORTING ROLE,The Color Purple (2023),Danielle Brooks,2023


In [13]:
pd.merge(nominations, movies.iloc[:3000], on='film')

Unnamed: 0,year_ceremony,category,film,name,year_film
0,1928,ACTRESS,7th Heaven,Janet Gaynor,1927
1,1928,ACTRESS,A Ship Comes In,Louise Dresser,1927
2,1929,ACTOR,Alibi,Chester Morris,1928
3,1929,ACTOR,In Old Arizona,Warner Baxter,1928
4,1929,ACTRESS,Coquette,Mary Pickford,1928
...,...,...,...,...,...
1090,2024,ACTRESS IN A LEADING ROLE,Nyad,Annette Bening,2023
1091,2024,ACTRESS IN A LEADING ROLE,Poor Things,Emma Stone,2023
1092,2024,ACTRESS IN A SUPPORTING ROLE,Barbie,America Ferrera,2023
1093,2024,ACTRESS IN A SUPPORTING ROLE,Nyad,Jodie Foster,2023


In [14]:
pd.merge(nominations, movies.iloc[:3000], on='film', how='left')

Unnamed: 0,year_ceremony,category,film,name,year_film
0,1928,ACTOR,The Last Command,Emil Jannings,
1,1928,ACTOR,The Noose,Richard Barthelmess,
2,1928,ACTRESS,7th Heaven,Janet Gaynor,1927
3,1928,ACTRESS,A Ship Comes In,Louise Dresser,1927
4,1928,ACTRESS,Sadie Thompson,Gloria Swanson,
...,...,...,...,...,...
1823,2024,ACTRESS IN A SUPPORTING ROLE,Barbie,America Ferrera,2023
1824,2024,ACTRESS IN A SUPPORTING ROLE,Nyad,Jodie Foster,2023
1825,2024,ACTRESS IN A SUPPORTING ROLE,Oppenheimer,Emily Blunt,2023
1826,2024,ACTRESS IN A SUPPORTING ROLE,The Color Purple (2023),Danielle Brooks,


In [15]:
pd.merge(nominations, movies, on='film', how='right')

Unnamed: 0,year_ceremony,category,film,name,year_film
0,,,"$1,000 a Minute",,1935
1,,,'38',,1986
2,,,'Crocodile' Dundee,,1986
3,1987,ACTOR IN A LEADING ROLE,'Round Midnight,Dexter Gordon,1986
4,,,(A) Torzija [(A) Torsion],,2003
...,...,...,...,...,...
5757,,,Zus & Zo,,2002
5758,2022,ACTOR IN A LEADING ROLE,"tick, tick...BOOM!",Andrew Garfield,2021
5759,,,À Nous la Liberté,,1931
5760,,,Ådalen '31,,1969


In [16]:
nominations = pd.merge(nominations, movies, on='film')

In [17]:
nominations.value_counts('name').head(10)

name
Meryl Streep         21
Katharine Hepburn    12
Jack Nicholson       12
Bette Davis          11
Paul Newman           9
Spencer Tracy         9
Al Pacino             9
Laurence Olivier      9
Denzel Washington     9
Robert De Niro        8
Name: count, dtype: int64

In [18]:
awards = pd.read_csv('awards.csv', dtype_backend='pyarrow')

In [19]:
awards.head()

Unnamed: 0,year_film,year_ceremony,category,film,name
0,1927,1928,ACTOR,The Last Command,Emil Jannings
1,1927,1928,ACTRESS,7th Heaven,Janet Gaynor
2,1927,1928,ART DIRECTION,The Dove (1927),William Cameron Menzies
3,1927,1928,CINEMATOGRAPHY,Sunrise,Karl Struss
4,1927,1928,CINEMATOGRAPHY,Sunrise,Charles Rosher


In [20]:
awards[awards.name == 'Meryl Streep']

Unnamed: 0,year_film,year_ceremony,category,film,name
1113,1979,1980,ACTRESS IN A SUPPORTING ROLE,Kramer vs. Kramer,Meryl Streep
1179,1982,1983,ACTRESS IN A LEADING ROLE,Sophie's Choice,Meryl Streep
1864,2011,2012,ACTRESS IN A LEADING ROLE,The Iron Lady,Meryl Streep


In [21]:
awards.value_counts('name').head(10)

name
Walt Disney, Producer                                                           22
Metro-Goldwyn-Mayer                                                             12
Italy                                                                           10
France                                                                           9
Warner Bros.                                                                     7
Alfred Newman                                                                    7
Gordon Hollingshead, Producer                                                    6
Paramount                                                                        5
Edward Selzer, Producer                                                          5
Metro-Goldwyn-Mayer Studio Sound Department, Douglas Shearer, Sound Director     5
Name: count, dtype: int64

In [22]:
pd.merge(nominations.value_counts('name'), awards.value_counts('name'), left_index=True, right_index=True).head(10)

Unnamed: 0_level_0,count_x,count_y
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Meryl Streep,21,3
Katharine Hepburn,12,4
Jack Nicholson,12,3
Bette Davis,11,2
Paul Newman,9,1
Spencer Tracy,9,2
Al Pacino,9,1
Laurence Olivier,9,1
Denzel Washington,9,2
Robert De Niro,8,2


In [23]:
pd.concat([nominations.value_counts('name'), awards.value_counts('name')], axis=1).head(10)

Unnamed: 0_level_0,count,count
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Meryl Streep,21.0,3.0
Katharine Hepburn,12.0,4.0
Jack Nicholson,12.0,3.0
Bette Davis,11.0,2.0
Paul Newman,9.0,1.0
Spencer Tracy,9.0,2.0
Al Pacino,9.0,1.0
Laurence Olivier,9.0,1.0
Denzel Washington,9.0,2.0
Robert De Niro,8.0,2.0


In [24]:
awards.film.value_counts()

film
Ben-Hur                                              11
Titanic (1997)                                       11
The Lord of the Rings: The Return of the King        11
West Side Story (1961)                               10
Gigi                                                  9
                                                     ..
WAR IS OVER! Inspired by the Music of John & Yoko     1
The Wonderful Story of Henry Sugar                    1
Godzilla Minus One                                    1
American Fiction                                      1
Anatomy of a Fall                                     1
Name: count, Length: 1343, dtype: int64[pyarrow]

In [25]:
pd.merge(awards, awards.film.value_counts(), left_on='film', right_index=True).query('count > 10')

  pd.merge(awards, awards.film.value_counts(), left_on='film', right_index=True).query('count > 10')


Unnamed: 0,year_film,year_ceremony,category,film,name,count
638,1959,1960,ACTOR,Ben-Hur,Charlton Heston,11
639,1959,1960,ACTOR IN A SUPPORTING ROLE,Ben-Hur,Hugh Griffith,11
643,1959,1960,ART DIRECTION (Color),Ben-Hur,"Art Direction: William A. Horning, Edward Car...",11
644,1959,1960,BEST MOTION PICTURE,Ben-Hur,"Sam Zimbalist, Producer",11
646,1959,1960,CINEMATOGRAPHY (Color),Ben-Hur,Robert L. Surtees,11
648,1959,1960,COSTUME DESIGN (Color),Ben-Hur,Elizabeth Haffenden,11
649,1959,1960,DIRECTING,Ben-Hur,William Wyler,11
652,1959,1960,FILM EDITING,Ben-Hur,"Ralph E. Winters, John D. Dunning",11
654,1959,1960,MUSIC (Music Score of a Dramatic or Comedy Pic...,Ben-Hur,Miklos Rozsa,11
659,1959,1960,SOUND,Ben-Hur,"Metro-Goldwyn-Mayer Studio Sound Department, F...",11


In [26]:
acting = awards[awards.category.str.startswith('ACT')]

In [27]:
pd.merge(acting, acting.film.value_counts(), left_on='film', right_index=True).query('count > 1')

  pd.merge(acting, acting.film.value_counts(), left_on='film', right_index=True).query('count > 1')


Unnamed: 0,year_film,year_ceremony,category,film,name,count
59,1934,1935,ACTOR,It Happened One Night,Clark Gable,2
60,1934,1935,ACTRESS,It Happened One Night,Claudette Colbert,2
134,1938,1939,ACTRESS,Jezebel,Bette Davis,2
135,1938,1939,ACTRESS IN A SUPPORTING ROLE,Jezebel,Fay Bainter,2
152,1939,1940,ACTRESS,Gone with the Wind,Vivien Leigh,2
...,...,...,...,...,...,...
2126,2022,2023,ACTOR IN A SUPPORTING ROLE,Everything Everywhere All at Once,Ke Huy Quan,3
2127,2022,2023,ACTRESS IN A LEADING ROLE,Everything Everywhere All at Once,Michelle Yeoh,3
2128,2022,2023,ACTRESS IN A SUPPORTING ROLE,Everything Everywhere All at Once,Jamie Lee Curtis,3
2149,2023,2024,ACTOR IN A LEADING ROLE,Oppenheimer,Cillian Murphy,2
