In [1]:
%matplotlib inline
import pandas as pd


In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [3]:
titles = pd.read_csv('D:\Downloads\pycon-pandas-tutorial-master\data/titles.csv')
titles.head()

Unnamed: 0,title,year
0,A Trip to the Moon,2017
1,My Wife's Relatives,1939
2,Passione d'amore,1981
3,Purple Day,1999
4,Sud Side Stori,2000


In [4]:
cast = pd.read_csv('D:\Downloads\pycon-pandas-tutorial-master\data\cast.csv')
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,31.0
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


### How many movies are listed in the titles dataframe?

In [5]:
number_of_movies = titles['title'].nunique()
print(f'Total no. of movies: {number_of_movies}')

Total no. of movies: 207630


### What are the earliest two films listed in the titles dataframe?

In [6]:
earliest = titles.sort_values('year', ascending=True).head(2)
print(earliest.to_string(index=False))

             title  year
        Miss Jerry  1894
The Startled Lover  1898


### How many movies have the single-word title "Hamlet"?

In [7]:
hamlet_appearances = titles['title'].str.contains('Hamlet', case=False).sum()
print(f'No. of movies with hamlet in the title: {hamlet_appearances}')

No. of movies with hamlet in the title: 55


### How many movies are titled "North by Northwest"?

In [8]:
north_by_northwest = titles.value_counts(subset='title').loc['North by Northwest']
print(f'No. of movies titled "North by Northwest": {north_by_northwest}')

No. of movies titled "North by Northwest": 1


### When was the first movie titled "Hamlet" made?

In [9]:
first_hamlet = (titles[titles['title'].str.contains('hamlet', case=False, na=False)]
                .sort_values('year')
                .head(1))
print(first_hamlet.to_string(index=False))

 title  year
Hamlet  1910


### List all of the "Treasure Island" movies from earliest to most recent.

In [10]:
treasure_island = (titles[titles['title'].str.contains('treasure island', case=False, na=False)]
                   .sort_values('year', ascending=True))
print(treasure_island.to_string(index=False))

                                               title  year
                                     Treasure Island  1918
                                     Treasure Island  1920
                                     Treasure Island  1934
                       The Secret of Treasure Island  1938
                                     Treasure Island  1950
                           Return to Treasure Island  1954
                           Five on a Treasure Island  1957
                                     Treasure Island  1972
                                     Treasure Island  1973
                                     Treasure Island  1985
                              Muppet Treasure Island  1996
                                Treasure Island (II)  1999
                                     Treasure Island  1999
Treasure Island Kids: The Mystery of Treasure Island  2006
 Treasure Island Kids: The Battle of Treasure Island  2006
                          Pirates of Treasure Island  20

### How many movies were made in the year 1950?

In [11]:
movies_1950 = (titles[titles['year'] == 1950]
               .value_counts(subset='title')
               .sum())
print(f'Total no. of movies made in 1950: {movies_1950} movies')

Total no. of movies made in 1950: 1109 movies


### How many movies were made in the year 1960?

In [12]:
movies_1960 = (titles[titles['year'] == 1960]
               .value_counts(subset='title')
               .sum())
print(f'Total no. of movies made in 1960: {movies_1960} movies')

Total no. of movies made in 1960: 1496 movies


### How many movies were made from 1950 through 1959?

In [13]:
movies_1950_1959 = (titles[(titles['year'].between(1950, 1959))]
                    .value_counts(subset='title')
                    .sum())
print(f'Total no. of movies made from 1950 to 1959: {movies_1950_1959} movies')

Total no. of movies made from 1950 to 1959: 12664 movies


### In what years has a movie titled "Batman" been released?

In [14]:
batman_movies = (titles[titles['title'] == 'Batman']
                .loc[:, ['year']])
print(batman_movies.to_string(index=False))

 year
 1943
 1989


### How many roles were there in the movie "Inception"?

In [15]:
inception_roles_no = (cast[cast['title'] == 'Inception']
                      .value_counts(subset='character')
                      .sum())
print(f'Number of roles in Incention: {inception_roles_no}')


Number of roles in Incention: 78


### How many roles in the movie "Inception" are NOT ranked by an "n" value?

In [16]:
unranked_inception = (cast[(cast['title'] == 'Inception') 
                           & (cast['n'].isnull())]
                      .value_counts(subset='character')
                      .sum())
print(f'Number of unranked roles in Inception: {unranked_inception}')

Number of unranked roles in Inception: 27


### But how many roles in the movie "Inception" did receive an "n" value?

In [17]:
ranked_inception = (cast[(cast['title'] == 'Inception') 
                         & (cast['n'].notnull())]
                    .value_counts(subset='character')
                    .sum())
print(f'Number of unranked roles in Inception: {ranked_inception}')

Number of unranked roles in Inception: 51


### Display the cast of "North by Northwest" in their correct "n"-value order, ignoring roles that did not earn a numeric "n" value.

In [18]:
north_by_northwest_cast = (cast[(cast['title'] == 'North by Northwest') 
                                & (cast['n'].notnull())]
                           .sort_values('n'))
print(north_by_northwest_cast.to_string(index=False))

             title  year                 name    type                   character    n
North by Northwest  1959           Cary Grant   actor          Roger O. Thornhill  1.0
North by Northwest  1959      Eva Marie Saint actress                 Eve Kendall  2.0
North by Northwest  1959          James Mason   actor             Phillip Vandamm  3.0
North by Northwest  1959  Jessie Royce Landis actress             Clara Thornhill  4.0
North by Northwest  1959       Leo G. Carroll   actor               The Professor  5.0
North by Northwest  1959 Josephine Hutchinson actress               Mrs. Townsend  6.0
North by Northwest  1959          Philip Ober   actor             Lester Townsend  7.0
North by Northwest  1959        Martin Landau   actor                     Leonard  8.0
North by Northwest  1959        Adam Williams   actor                    Valerian  9.0
North by Northwest  1959         Edward Platt   actor             Victor Larrabee 10.0
North by Northwest  1959    Robert Ellenste

### Display the entire cast, in "n"-order, of the 1972 film "Sleuth".

In [19]:
sleuth_1972_cast = (cast[(cast['title'] == 'Sleuth') 
                         & (cast['year'] == 1972)]
                    .sort_values('n'))
print(sleuth_1972_cast.to_string(index=False))

 title  year               name    type                  character   n
Sleuth  1972   Laurence Olivier   actor                Andrew Wyke 1.0
Sleuth  1972      Michael Caine   actor                Milo Tindle 2.0
Sleuth  1972     Alec Cawthorne   actor          Inspector Doppler 3.0
Sleuth  1972 John (II) Matthews   actor Detective Sergeant Tarrant 4.0
Sleuth  1972 Eve (III) Channing actress            Marguerite Wyke 5.0
Sleuth  1972       Teddy Martin   actor     Police Constable Higgs 6.0


### Now display the entire cast, in "n"-order, of the 2007 version of "Sleuth".

In [20]:
sleuth_2007_cast = (cast[(cast['title'] == 'Sleuth') 
                         & (cast['year'] == 2007)]
                    .sort_values('n'))
print(sleuth_2007_cast.to_string(index=False))

 title  year                name    type         character   n
Sleuth  2007       Michael Caine   actor            Andrew 1.0
Sleuth  2007            Jude Law   actor              Milo 2.0
Sleuth  2007       Harold Pinter   actor       Man on T.V. 3.0
Sleuth  2007     Kenneth Branagh   actor Other Man on T.V. NaN
Sleuth  2007 Alec (II) Cawthorne   actor Inspector Doppler NaN
Sleuth  2007   Eve (II) Channing actress   Marguerite Wyke NaN
Sleuth  2007   Carmel O'Sullivan actress            Maggie NaN


### How many roles were credited in the silent 1921 version of Hamlet?

In [21]:
hamlet_1921 = (cast[(cast['title'] == 'Hamlet') 
                    & (cast['year'] == 1921)]
               .value_counts(subset='name')
               .sum())
print(f'Number of actors in the 1921 version of Hamlet: {hamlet_1921}')

Number of actors in the 1921 version of Hamlet: 9


### How many roles were credited in Branagh’s 1996 Hamlet?

In [22]:
hamlet_1996 = (cast[(cast['title'] == 'Hamlet') 
                    & (cast['year'] == 1996)]
               .value_counts(subset='name')
               .sum())
print(f'Number of actors in the 1996 version of Hamlet: {hamlet_1996}')

Number of actors in the 1996 version of Hamlet: 54


### How many "Hamlet" roles have been listed in all film credits through history?

In [23]:
hamlet_roles = (cast[cast['title'] == 'Hamlet']
                .value_counts(subset='character')
                .sum())
print(f'Number of roles in all Hamlet movies: {hamlet_roles}')

Number of roles in all Hamlet movies: 342


### How many people have played an "Ophelia"?

In [24]:
ophelia_roles =(cast[cast['character'] == 'Ophelia']
                ['name'].nunique())
print(f'Number of actresses who played Ophelia: {ophelia_roles}')


Number of actresses who played Ophelia: 101


### How many people have played a role called "The Dude"?

In [25]:
the_dude_roles = (cast.loc[cast['character'] == 'The Dude']
                  ['name'].nunique())
print(f'Number of actors who played the role of "The Dude": {the_dude_roles}')

Number of actors who played the role of "The Dude": 17


### How many people have played a role called "The Stranger"?

In [26]:
the_stranger_roles = (cast.loc[cast['character'] == 'The Stranger']
                  ['name'].nunique())
print(f'Number of actors who played the role of "The Stranger": {the_stranger_roles}')

Number of actors who played the role of "The Stranger": 160


### How many roles has Sidney Poitier played throughout his career?

In [27]:
sidney_poitier_roles = (cast.loc[cast['name'] == 'Sidney Poitier']
                        .value_counts(subset='character')
                        .sum())
print(f'Number of roles Sidney Poiter has played: {sidney_poitier_roles}')

Number of roles Sidney Poiter has played: 43


### How many roles has Judi Dench played?

In [28]:
judi_dench_roles = (cast.loc[cast['name'] == 'Judi Dench']
                        .value_counts(subset='character')
                        .sum())
print(f'Number of roles Judi Dench has played: {judi_dench_roles}')

Number of roles Judi Dench has played: 55


### List the supporting roles (having n=2) played by Cary Grant in the 1940s, in order by year.

In [29]:
cary_grant_1940_roles = (cast.loc[(cast['name'] == 'Cary Grant') 
                                  & (cast['n'] == 2)
                                  & (cast['year'].between(1940, 1949))
                                  ,['title', 'year']]
                            .sort_values('year'))
print('Roles Cary Grant played in the 1940s where he was not the lead actor')
print(cary_grant_1940_roles.to_string(index=False))


Roles Cary Grant played in the 1940s where he was not the lead actor
           title  year
My Favorite Wife  1940
  Penny Serenade  1941


### List the leading roles that Cary Grant played in the 1940s in order by year.

In [30]:
cary_grant_1940_lead = (cast.loc[(cast['name'] == 'Cary Grant')
                                 & (cast['year'].between(1940, 1949))
                                           & (cast['n'] == 1)
                                           , ['title', 'year']]
                                           .sort_values('year'))
print('Cary Grant lead roles in the 1940s')
print(cary_grant_1940_lead.to_string(index=False))

Cary Grant lead roles in the 1940s
                               title  year
                     His Girl Friday  1940
             The Howards of Virginia  1940
              The Philadelphia Story  1940
                           Suspicion  1941
                The Talk of the Town  1942
               Once Upon a Honeymoon  1942
                           Mr. Lucky  1943
                   Destination Tokyo  1943
                    Once Upon a Time  1944
                Arsenic and Old Lace  1944
           None But the Lonely Heart  1944
                           Notorious  1946
                       Night and Day  1946
    The Bachelor and the Bobby-Soxer  1947
                   The Bishop's Wife  1947
        Every Girl Should Be Married  1948
Mr. Blandings Builds His Dream House  1948
              I Was a Male War Bride  1949


### How many roles were available for actors in the 1950s?

In [31]:
unique_actor_roles_1950s = (cast.loc[(cast['year'].between(1950, 1959))
                               & (cast['type'] == 'actor')
                               , 'character']
                      .value_counts()
                      .sum())
print(f'Available actor roles in the 1950s: {unique_actor_roles_1950s} roles')

Available actor roles in the 1950s: 153559 roles


### How many roles were available for actresses in the 1950s?

In [32]:
unique_actress_roles_1950s = (cast.loc[(cast['year'].between(1950, 1959))
                               & (cast['type'] == 'actress')
                               , 'character']
                      .value_counts()
                      .sum())
print(f'Available actress roles in the 1950s: {unique_actress_roles_1950s} roles')

Available actress roles in the 1950s: 56331 roles


### How many leading roles (n=1) were available from the beginning of film history through 1980?

In [33]:
leading_roles = (cast.loc[(cast['n'] == 1)
                          & (cast['year'] <= 1980)]
                          .value_counts()
                          .sum())
print(f'Available leading roles until 1980: {leading_roles}')

Available leading roles until 1980: 62763


### How many non-leading roles were available through from the beginning of film history through 1980?

In [34]:
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,31.0
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


In [35]:
non_leading_roles = (cast.loc[(cast['n'] != 1)
                              & (cast['year'] <= 1980)] 
                              .value_counts()
                              .sum())
print(f'Available supporting roles until 1980: {non_leading_roles}')

Available supporting roles until 1980: 640923


### How many roles through 1980 were minor enough that they did not warrant a numeric "n" rank?

In [36]:
minor_roles = (cast['n'].isnull()
               .sum())
print(f'Number of roles for minors: {minor_roles} roles')

Number of roles for minors: 1327783 roles
