In [15]:
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact
import ipywidgets as widgets

In [16]:
actor_movies_ratings = pd.read_csv("actor_movies_ratings.csv").drop_duplicates(subset=['tconst','nconst'])
print(actor_movies_ratings.shape)
actor_movies_ratings.head(3)

(313009, 13)


Unnamed: 0,tconst,ordering,nconst,category,job,characters,averageRating,numVotes,primaryTitle,startYear,region,primaryName,birthYear
0,tt0035423,2,nm0413168,actor,\N,"[""Leopold""]",6.4,88860,Kate & Leopold,2001.0,US,Hugh Jackman,1968
2,tt0120903,2,nm0413168,actor,\N,"[""Logan""]",7.3,644121,X-Men,2000.0,US,Hugh Jackman,1968
8,tt0180037,2,nm0413168,actor,\N,"[""Jack Willis""]",6.4,2344,Paperback Hero,1999.0,US,Hugh Jackman,1968


In [17]:
do_not_include = [ 'Ajay Devgn', 'Irrfan Khan',
       'Prakash Raj','Paresh Rawal', 'Amitabh Bachchan',
       'Aamir Khan', 'Akshay Kumar','Anupam Kher',  'Nassar'
       , 'Shah Rukh Khan','Sumit Keshri', 'Gaurav Manaktala', 'Suryakanth']

actor_movies_filtered = actor_movies_ratings[(actor_movies_ratings['numVotes']>=10000)
                                          #    &(actor_movies_ratings['averageRating']>=7)
                                             &(~actor_movies_ratings['primaryName'].isin(do_not_include))].copy()
actor_movies_filtered.shape

(44551, 13)

In [18]:
all_years = actor_movies_filtered[['startYear']].drop_duplicates().sort_values('startYear')
all_years.head(3)

Unnamed: 0,startYear
406,1980.0
930,1981.0
1120,1982.0


In [19]:
actor_movies_filtered['total_hit_movies'] = actor_movies_filtered.groupby('primaryName')['tconst'].transform('nunique')

In [20]:
actors_filtered = actor_movies_filtered[actor_movies_filtered['total_hit_movies']>=15].sort_values('total_hit_movies', ascending=True).copy()
actors_filtered.head(3)

Unnamed: 0,tconst,ordering,nconst,category,job,characters,averageRating,numVotes,primaryTitle,startYear,region,primaryName,birthYear,total_hit_movies
35768,tt0418279,7,nm0000685,actor,\N,"[""Defense Secretary John Keller""]",7.0,672629,Transformers,2007.0,US,Jon Voight,1938,15
128366,tt1979376,5,nm1221047,actor,\N,"[""Ducky""]",7.7,278543,Toy Story 4,2019.0,US,Keegan-Michael Key,1971,15
128367,tt1985949,8,nm1221047,actor,\N,"[""Judge Peckinpah""]",6.3,103157,The Angry Birds Movie,2016.0,US,Keegan-Michael Key,1971,15


In [21]:
actors_filtered.to_csv("actors_filtered.csv", index=False)

In [22]:
all_actors = actors_filtered[['primaryName','birthYear']].drop_duplicates()
all_actors.head(3)

Unnamed: 0,primaryName,birthYear
35768,Jon Voight,1938
128366,Keegan-Michael Key,1971
133567,Hrithik Roshan,1974


In [23]:
all_years['key'] = 0
all_actors['key'] = 0
all_years_all_actors = all_years.merge(all_actors, on='key', how='outer')

In [31]:
actors_filtered_agg = actors_filtered.groupby(['startYear','nconst','primaryName']).agg(averageRating=('averageRating','mean')
                                                                               ,numVotes=('numVotes','mean')
                                                                               ,primaryTitle=('primaryTitle',list)).reset_index()
actors_filtered_agg.head(3)

Unnamed: 0,startYear,nconst,primaryName,averageRating,numVotes,primaryTitle
0,1980.0,nm0000095,Woody Allen,7.2,23919.0,[Stardust Memories]
1,1980.0,nm0000101,Dan Aykroyd,7.9,213552.0,[The Blues Brothers]
2,1980.0,nm0000102,Kevin Bacon,6.4,156772.0,[Friday the 13th]


In [37]:
actors_with_all_years = all_years_all_actors.merge(actors_filtered_agg[['startYear','nconst','primaryName','averageRating','numVotes','primaryTitle']]
                           , how='left', on=['startYear','primaryName']).drop(labels='key', axis=1)

actors_with_all_years['has_movie'] = ~actors_with_all_years['primaryTitle'].isna()

In [38]:
actors_with_all_years_sorted = actors_with_all_years.sort_values(['primaryName','startYear'])

In [39]:
actors_with_all_years_sorted['startYear'] = actors_with_all_years_sorted['startYear'].astype(int)
actors_with_all_years_sorted['birthYear'] = actors_with_all_years_sorted['birthYear'].astype(int)
actors_with_all_years_sorted['age_at_movie'] = actors_with_all_years_sorted['startYear']-actors_with_all_years_sorted['birthYear']

In [40]:
actors_with_all_years_sorted['rolling_5yr_movies'] = actors_with_all_years_sorted.groupby('primaryName')['has_movie'].transform(lambda s: s.rolling(5, min_periods=1).sum())
actors_with_all_years_sorted['rolling_5yr_avgrating'] = actors_with_all_years_sorted.groupby('primaryName')['averageRating'].transform(lambda s: s.rolling(5, min_periods=1).mean())
actors_with_all_years_sorted['rolling_5yr_sumrating'] = actors_with_all_years_sorted.groupby('primaryName')['averageRating'].transform(lambda s: s.rolling(5, min_periods=1).sum())
actors_with_all_years_sorted['max_rolling_5yr_sumrating'] = actors_with_all_years_sorted.groupby('primaryName')['rolling_5yr_sumrating'].transform('max')

In [41]:
actors_with_all_years_sorted.to_csv("actors_with_all_years_sorted.csv", index=False)

In [16]:
@interact(one_year=widgets.IntSlider(min=1980, max=2024, step=1, value=1980))
def get_one_year(one_year):
    one_year_actors = (actors_with_all_years_sorted[(actors_with_all_years_sorted['startYear']==one_year)
                                                #    &(actors_with_all_years_sorted['has_movie']==1)
                                                   ].copy())
    fig, ax = plt.subplots(figsize=(11,6))
    ax.scatter(one_year_actors['age_at_movie'], one_year_actors['rolling_5yr_sumrating'])

    for idx, row in one_year_actors.iterrows():
        ax.annotate(row['primaryName'], (row['age_at_movie'], row['rolling_5yr_sumrating']))

    year_title = ax.annotate(f"Year {one_year}",(40,38), fontsize=16, ha='center', fontweight='bold')
    year_title.set_bbox(dict(facecolor='white', alpha=0.9, edgecolor='white'))

    ax.spines['left'].set_position(('data', 40))
    ax.spines['right'].set_position(('data', 40))
    ax.set_ylim(ymin=7, ymax=actors_with_all_years_sorted['rolling_5yr_sumrating'].max())
    ax.set_xlim(xmin=20, xmax=70)

interactive(children=(IntSlider(value=1980, description='one_year', max=2024, min=1980), Output()), _dom_class…

In [20]:
#filter based max max_rolling_5yr_sumrating
filtered_actors_with_all_years_sorted = actors_with_all_years_sorted[actors_with_all_years_sorted['max_rolling_5yr_sumrating']>=30].copy()
max_y = filtered_actors_with_all_years_sorted['rolling_5yr_sumrating'].max()
@interact(one_year=widgets.IntSlider(min=1980, max=2024, step=1, value=1980))
def get_one_year(one_year):
    one_year_actors = (filtered_actors_with_all_years_sorted[(filtered_actors_with_all_years_sorted['startYear']==one_year)
                                                   ].copy())
    fig, ax = plt.subplots(figsize=(11,6))
    ax.scatter(one_year_actors['age_at_movie'], one_year_actors['rolling_5yr_sumrating'])

    for idx, row in one_year_actors.iterrows():
        ax.annotate(row['primaryName'], (row['age_at_movie'], row['rolling_5yr_sumrating']))

    year_title = ax.annotate(f"Year {one_year}",(40,38), fontsize=16, ha='center', fontweight='bold')
    year_title.set_bbox(dict(facecolor='white', alpha=0.9, edgecolor='white'))

    ax.spines['left'].set_position(('data', 40))
    ax.spines['right'].set_position(('data', 40))
    ax.set_ylim(ymin=7, ymax=max_y)
    ax.set_xlim(xmin=20, xmax=70)

interactive(children=(IntSlider(value=1980, description='one_year', max=2024, min=1980), Output()), _dom_class…