In [1]:
%matplotlib inline
import pandas as pd

In [2]:
# This code makes pandas dataframe look formatted (with colors, etc). Note: it uses a file 'style-notebook.css, etc.
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [3]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None, encoding='utf=8')
titles.head()

Unnamed: 0,title,year
0,The Rising Son,1990
1,The Thousand Plane Raid,1969
2,Crucea de piatra,1993
3,Country,2000
4,Gaiking II,2011


In [4]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None, encoding='utf=8')
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Suuri illusioni,1985,Homo $,actor,Guests,22.0
1,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
2,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
3,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,
4,Straight Outta Compton,2015,$hutter,actor,Club Patron,


### How many movies are listed in the titles dataframe?

In [5]:
len(titles)

225984

### What are the earliest two films listed in the titles dataframe?

In [6]:
titles.sort_values('year').head(2)

Unnamed: 0,title,year
172759,Miss Jerry,1894
121072,Reproduction of the Corbett and Jeffries Fight,1899


### How many movies have the title "Hamlet"?

In [7]:
len(titles[titles['title'] == 'Hamlet'])

19

### How many movies are titled "North by Northwest"?

In [8]:
len(titles[titles.title == 'North by Northwest'])

1

### When was the first movie titled "Hamlet" made?

In [9]:
titles[titles.title == 'Hamlet'].sort_values('year').head(1)

Unnamed: 0,title,year
45437,Hamlet,1910


### List all of the "Treasure Island" movies from earliest to most recent.

In [10]:
titles[titles.title == 'Treasure Island'].sort_values('year')

Unnamed: 0,title,year
200338,Treasure Island,1918
49875,Treasure Island,1920
185736,Treasure Island,1934
94278,Treasure Island,1950
57999,Treasure Island,1972
108325,Treasure Island,1973
199726,Treasure Island,1985
174330,Treasure Island,1999


### How many movies were made in the year 1950?

In [11]:
len(titles[titles.year == 1950])

1082

### How many movies were made in the year 1960?

In [12]:
len(titles[titles.year == 1960])

1484

### How many movies were made from 1950 through 1959?

In [13]:
len(titles[(titles.year >= 1950) & (titles.year < 1960)])

12468

### In what years has a movie titled "Batman" been released?

In [14]:
titles[titles.title == "Batman"]

Unnamed: 0,title,year
54479,Batman,1989
72586,Batman,1943


### How many roles were there in the movie "Inception"?

In [15]:
len(cast[cast.title == "Inception"])

78

### How many roles in the movie "Inception" are NOT ranked by an "n" value?

In [16]:
inception_cast = cast[cast.title == "Inception"]

In [20]:
inception_cast_null = inception_cast[inception_cast.n.isnull()]
inception_cast_null

Unnamed: 0,title,year,name,type,character,n
91224,Inception,2010,Michael (III) August,actor,LAX Passenger,
184310,Inception,2010,Ken Bhan,actor,Mombasan Gambler,
350432,Inception,2010,John Ceallach,actor,LAX Passenger,
640888,Inception,2010,Seve Esposito,actor,Italian Gambler,
647039,Inception,2010,Marcus (II) Everett,actor,Stock Broker,
662990,Inception,2010,Zachary Christopher Fay,actor,Extra,
783670,Inception,2010,Daniel Girondeaud,actor,Bridge Sub Con,
943766,Inception,2010,Andrew Hoagland,actor,Pedestrian,
1026810,Inception,2010,Charles Jarrell,actor,Hotel Guest,
1039402,Inception,2010,Seong-hwan Jo,actor,Japanese Landing Crew,


In [22]:
len(inception_cast_null)

27

### But how many roles in the movie "Inception" did receive an "n" value?

In [18]:
len(inception_cast[inception_cast.n.notnull()])

51

In [19]:
inception_cast[inception_cast.n.notnull()].head()

Unnamed: 0,title,year,name,type,character,n
138528,Inception,2010,Peter Basham,actor,Fischer's Jet Captain,44.0
171288,Inception,2010,Tom Berenger,actor,Browning,8.0
242553,Inception,2010,Virgile Bramly,actor,Bridge Sub Con,29.0
305293,Inception,2010,Michael Caine,actor,Miles,11.0
311506,Inception,2010,Earl Cameron,actor,Elderly Bald Man,20.0


### Display the cast of "North by Northwest" in their correct "n"-value order, ignoring roles that did not earn a numeric "n" value.

In [23]:
nbynw_cast = cast[cast.title == "North by Northwest"]
nbynw_cast.head(2)


Unnamed: 0,title,year,name,type,character,n
11733,North by Northwest,1959,Stanley Adams,actor,Lieutenant Harding,
26846,North by Northwest,1959,Andy Albin,actor,Farmer,


In [24]:
nbynw_cast[nbynw_cast.n.notnull()].sort_values('n')

Unnamed: 0,title,year,name,type,character,n
816462,North by Northwest,1959,Cary Grant,actor,Roger O. Thornhill,1.0
3262390,North by Northwest,1959,Eva Marie Saint,actress,Eve Kendall,2.0
1364705,North by Northwest,1959,James Mason,actor,Phillip Vandamm,3.0
2935500,North by Northwest,1959,Jessie Royce Landis,actress,Clara Thornhill,4.0
333493,North by Northwest,1959,Leo G. Carroll,actor,The Professor,5.0
2837895,North by Northwest,1959,Josephine Hutchinson,actress,Mrs. Townsend,6.0
1588311,North by Northwest,1959,Philip Ober,actor,Lester Townsend,7.0
1193875,North by Northwest,1959,Martin Landau,actor,Leonard,8.0
2290382,North by Northwest,1959,Adam Williams,actor,Valerian,9.0
1696719,North by Northwest,1959,Edward Platt,actor,Victor Larrabee,10.0


### Display the entire cast, in "n"-order, of the 1972 film "Sleuth".

In [25]:
sleuth_cast = cast[cast.title == "Sleuth"]
sleuth_cast.head(2)

Unnamed: 0,title,year,name,type,character,n
242658,Sleuth,2007,Kenneth Branagh,actor,Other Man on T.V.,
305321,Sleuth,1972,Michael Caine,actor,Milo Tindle,2.0


In [26]:
sleuth_cast[sleuth_cast.n.notnull()].sort_values('n')

Unnamed: 0,title,year,name,type,character,n
305322,Sleuth,2007,Michael Caine,actor,Andrew,1.0
1597654,Sleuth,1972,Laurence Olivier,actor,Andrew Wyke,1.0
305321,Sleuth,1972,Michael Caine,actor,Milo Tindle,2.0
1211192,Sleuth,2007,Jude Law,actor,Milo,2.0
349710,Sleuth,1972,Alec Cawthorne,actor,Inspector Doppler,3.0
1691641,Sleuth,2007,Harold Pinter,actor,Man on T.V.,3.0
1372416,Sleuth,1972,John (II) Matthews,actor,Detective Sergeant Tarrant,4.0
2543628,Sleuth,1972,Eve (III) Channing,actress,Marguerite Wyke,5.0
1356873,Sleuth,1972,Teddy Martin,actor,Police Constable Higgs,6.0


### Now display the entire cast, in "n"-order, of the 2007 version of "Sleuth".

In [27]:
sleuth_cast_2007 = sleuth_cast[sleuth_cast.year == 2007]

In [28]:
sleuth_cast_2007[sleuth_cast_2007.n.notnull()].sort_values('n')

Unnamed: 0,title,year,name,type,character,n
305322,Sleuth,2007,Michael Caine,actor,Andrew,1.0
1211192,Sleuth,2007,Jude Law,actor,Milo,2.0
1691641,Sleuth,2007,Harold Pinter,actor,Man on T.V.,3.0


### How many roles were credited in the silent 1921 version of Hamlet?

In [29]:
hamlet_1921 = cast[(cast.title == "Hamlet") & (cast.year == 1921)]

In [37]:
len(hamlet_1921)

9

### How many roles were credited in Branagh’s 1996 Hamlet?

In [34]:
hamlet_1996 = cast[(cast.title == "Hamlet") & (cast.year == 1996)]

In [35]:
len(hamlet_1996)

54

### How many "Hamlet" roles have been listed in all film credits through history?

In [38]:
hamlet_roles = cast[cast.title == "Hamlet"]
len(hamlet_roles)

313

### How many people have played an "Ophelia"?

In [39]:
ophelia = cast[cast.character == "Ophelia"]
len(ophelia)

102

### How many people have played a role called "The Dude"?

In [40]:
the_dude = cast[cast.character == "The Dude"]
the_dude

Unnamed: 0,title,year,name,type,character,n
253887,The Big Lebowski,1998,Jeff Bridges,actor,The Dude,1.0
317156,Terms & Conditions,2015,Jordan Cann,actor,The Dude,9.0
466923,Stranger,2000,Scott Crowell,actor,The Dude,
534106,Pizza Man vs. the Dude,2004,Chris DeMarcus,actor,The Dude,
572030,Sweepstakes,1931,Mike Donlin,actor,The Dude,12.0
589735,Self Helpless,2010,Devin The Dude,actor,The Dude,
1021601,Jay and Silent Bob Strike Back,2001,Matthew (XIX) James,actor,The Dude,37.0
1067492,Explicit Ills,2008,Christopher Kadish,actor,The Dude,27.0
1280183,American Idiots,2013,Jason Loughridge,actor,The Dude,10.0
1298452,The Winds of Autumn,1976,Steve Lyons,actor,The Dude,30.0


In [41]:
len(the_dude)

17

### How many people have played a role called "The Stranger"?

In [42]:
the_stranger = cast[cast.character == "The Stranger"]
len(the_stranger)

206

### How many roles has Sidney Poitier played throughout his career?

In [43]:
sidney = cast[cast.name == "Sidney Poitier"]
sidney

Unnamed: 0,title,year,name,type,character,n
1701274,A Patch of Blue,1965,Sidney Poitier,actor,Gordon Ralfe,1.0
1701275,A Piece of the Action,1977,Sidney Poitier,actor,Manny Durrell,1.0
1701276,A Raisin in the Sun,1961,Sidney Poitier,actor,Walter Lee Younger,1.0
1701277,A Warm December,1973,Sidney Poitier,actor,Dr. Matt Younger,1.0
1701278,All the Young Men,1960,Sidney Poitier,actor,Sgt. Eddie Towler,2.0
1701279,Band of Angels,1957,Sidney Poitier,actor,Rau-Ru,3.0
1701280,Blackboard Jungle,1955,Sidney Poitier,actor,Gregory W. Miller,10.0
1701281,Brother John,1971,Sidney Poitier,actor,John Kane,1.0
1701282,Buck and the Preacher,1972,Sidney Poitier,actor,Buck,1.0
1701283,"Cry, the Beloved Country",1951,Sidney Poitier,actor,Reverend Msimangu,3.0


In [44]:
len(sidney)

43

### How many roles has Judi Dench played?

In [45]:
judi_dench = cast[cast.name == "Judi Dench"]
len(judi_dench)

54

### List the supporting roles (having n=2) played by Cary Grant in the 1940s, in order by year.

In [47]:
cary_grant = cast[cast.name == 'Cary Grant']
cary_grant[(cary_grant.year >= 1940) & (cary_grant.year < 1950) & (cary_grant.n == 2)].sort_values('year')

Unnamed: 0,title,year,name,type,character,n
816459,My Favorite Wife,1940,Cary Grant,actor,Nick,2.0
816469,Penny Serenade,1941,Cary Grant,actor,Roger Adams,2.0


### List the leading roles that Cary Grant played in the 1940s in order by year.

In [48]:
cary_grant[(cary_grant.year >= 1940) & (cary_grant.year <= 1949) & (cary_grant.n == 1)].sort_values('year')

Unnamed: 0,title,year,name,type,character,n
816484,The Howards of Virginia,1940,Cary Grant,actor,Matt Howard,1.0
816441,His Girl Friday,1940,Cary Grant,actor,Walter Burns,1.0
816486,The Philadelphia Story,1940,Cary Grant,actor,C. K. Dexter Haven,1.0
816474,Suspicion,1941,Cary Grant,actor,Johnnie,1.0
816488,The Talk of the Town,1942,Cary Grant,actor,Leopold Dilg,1.0
816465,Once Upon a Honeymoon,1942,Cary Grant,actor,Patrick 'Pat' O'Toole,1.0
816432,Destination Tokyo,1943,Cary Grant,actor,Capt. Cassidy,1.0
816457,Mr. Lucky,1943,Cary Grant,actor,Joe Adams,1.0
816458,Mr. Lucky,1943,Cary Grant,actor,Joe Bascopolous,1.0
816466,Once Upon a Time,1944,Cary Grant,actor,Jerry Flynn,1.0


### How many roles were available for actors in the 1950s?

In [49]:
cast_1950s = cast[(cast.year >= 1950) & (cast.year <= 1959)]
len(cast_1950s)

208718

### How many roles were avilable for actresses in the 1950s?

In [50]:
actresses = cast[cast.type == "actress"]

In [51]:
len(actresses[(actresses.year >=1950) & (actresses.year < 1960)])

55668

### How many leading roles (n=1) were available from the beginning of film history through 1980?

In [52]:
len(cast[(cast.n == 1) & (cast.year <= 1980)])

63124

### How many non-leading roles were available through from the beginning of film history through 1980?

In [53]:
len(cast[(cast.n != 1) & (cast.year <= 1980)])

1081819

### How many roles through 1980 were minor enough that they did not warrant a numeric "n" rank?

In [54]:
len(cast[(cast.n.isnull()) & (cast.year <= 1980)])

432392