In [1]:
import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
# import plotly.express as px
from datetime import datetime, timedelta
import ast
# import requests
# from bs4 import BeautifulSoup
# import json
# import time

In [2]:
df = pd.read_csv('movies.csv')

## Data Cleaning

* Convert all column names to lower case for processing
* Convert `year` column from integer to string
* Convert `released` column from string to datetime object
* Convert `boxoffice` column from string to integer
* Remove "mins" from `runtime` column and convert column from string to integer
    * Inspect and address single '21S min' value in column
    * drop this observation
* Reduce genre to first descriptor
* Convert `budget` column from string to integer
    * strip all non-digit characters
* Convert `directorid` to list of strings
* Drop duplicate rows
    * some films appear 2x since they were in the top 200 in multiple years
    * films released towards end of year
* Remove all films released prior to 2002

In [3]:
# convert column names to lowercase
df.columns = df.columns.str.lower()

# convert year column from integer to string
df['year'] = df['year'].astype(str)

# convert released column from string to datetime object
df['released'] = pd.to_datetime(df['released'], format='%d %b %Y')

# convert box office column from string to integer
# Remove '$' and commas, and convert to integers
df['boxoffice'] = df['boxoffice'].str.replace('$', '').str.replace(',', '').astype(int)

# Inspect and repair single '21S min' value in column
df.drop(df[df['runtime'] == '21S min'].index, inplace=True)

# Remove "mins" from `runtime` column and convert column from string to integer
df['runtime'] = df['runtime'].str.replace(' min', '').astype(int)

# Reduce genre to first descriptor
df['genre'] = df['genre'].apply(lambda x: x.split(',')[0])

# convert budget column from string to integer
# strip all non-digit characters from strings
df['budget'] = pd.to_numeric(df['budget'].str.replace(r'\D', '', regex=True), errors='coerce')

# convert directorid to list of strings
df['directorid'] = df['directorid'].apply(lambda x: pd.notna(x) and ast.literal_eval(x) if len(x) > 9 else [x])

  df['boxoffice'] = df['boxoffice'].str.replace('$', '').str.replace(',', '').astype(int)


TypeError: object of type 'float' has no len()

In [255]:
missing_ids = df.query("directorid.isna()")

In [273]:
def scrape_missing_names(row, column):
    if pd.notna(row[column]):
        name = row[column]
        url = "https://www.imdb.com/find/?q="
        response = requests.get(url+name, headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.text, 'html.parser')
        time.sleep(.2)
        script_tag = soup.find('script', {'type': 'application/json'})
        json_string = script_tag.string
        data = json.loads(json_string)
        try:
            id = data['props']['pageProps']['nameResults']['results'][0]['id']
            time.sleep(.2)
        except Exception:
            id = None
    else:
        id = None
    
    return id

In [274]:
missing_ids['directorid'] = missing_ids.apply(scrape_missing_names, column='director', axis=1) 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [275]:
missing_ids.directorid.isna().sum()

2

In [278]:
# # replace missing director ids with the new ids
# df.loc[df['directorid'].isna()] = missing_ids
# df.directorid.isna().sum()

2

In [279]:
# df.to_csv('movies_clean.csv', index=False)

# Using Clean Data

In [4]:
df = pd.read_csv('movies_clean.csv')

# drop duplicate rows
df = df.drop_duplicates(subset=['title','year'])
# convert released column from string to datetime object
df['released'] = pd.to_datetime(df['released'], format='%Y-%m-%d')
# replace rows with duplicate box office and budget with budget = NA
df.loc[df['boxoffice'] == df['budget'], 'budget'] = None
# replace rows without actorids to string 'none'
df.loc[df['actorid'].isna(), 'actorid'] = 'None'
# replace rows without directorids to string 'none'
df.loc[df['directorid'].isna(), 'directorid'] = 'None'
# drop rows that are missing directors
df = df.drop(df.query('director.isna()').index)
# drop films released prior to 2002 (83 films)
df = df.drop(df.query('released < 2002').index)
# reset the index
df.reset_index(drop=True, inplace=True)

### Feature Engineering 

* Create `num_directors` - number of directors on the film.    
* Create `num_actors` - number of leading actors on the film.    
* Create `director_rev5` and `director_rev10` - total box office revenue of films of director, previous 5/10 years.    
* Create `actor_rev5` and `actor_rev10` - combined total box office revenue of films of leading actors, previous 5/10 years. 
* Create `director_films5` and `director_films10` - total films of director, previous 5/10 years.   
* Create `actor_films5` and `actor_films10` - combined total films of actors, previous 5/10 years.    


In [5]:
# convert directorid to list of strings

# Define a lambda function that safely evaluates the input
def safe_eval(x):
    try:
        return ast.literal_eval(x) if x and len(str(x)) > 9 else [x]
    except (SyntaxError, ValueError):
        return [str(x)]

# Apply the modified lambda function to the DataFrame
df['directorid'] = df['directorid'].apply(safe_eval)
# df['directorid'] = df['directorid'].apply(lambda x: ast.literal_eval(x) if x and len(str(x)) > 9 else [x])
# df['directorid'] = df['directorid'].apply(ast.literal_eval)
df['actorid'] = df['actorid'].apply(lambda x: ast.literal_eval(x) if x and len(str(x)) > 9 else [x])

In [6]:
# create number of directors column
df['num_directors'] = df['directorid'].apply(len)
# create number of leading actors column
df['num_actors'] = df['actorid'].apply(len)
# for actorids that contain 'None' set num_actors to 0
df.loc[df['actorid'].apply(lambda x: 'None' in x), 'num_actors'] = 0

In [36]:
df[df['directorid'].apply(lambda x: 'nm1954072' in x)]

Unnamed: 0,title,year,rated,released,runtime,genre,director,writer,actors,plot,...,dvd,boxoffice,production,website,response,directorid,actorid,budget,num_directors,num_actors
3656,Emergency Declaration,2021,,2022-08-03,141,Action,Han Jae-rim,Han Jae-rim,"Song Kang-ho, Lee Byung-hun, Jeon Do-yeon","After a terror incident occurs mid-flight, a p...",...,29 Nov 2022,412196,,,True,[nm1954072],"[nm0814280, nm0496932, nm0161133]",,1,3


In [8]:
# Define a function to calculate the cumulative sum for a row within the past x years
def calculate_cumulative_sum(row, column, years):
    print(row[column])
    date = row['released']
    strings = row[column]
    
    # Calculate the cutoff date 5 years before the current date
    cutoff_date = date - pd.DateOffset(years=years)
    
    cumulative_sum = 0
    
    for string in strings:
        print(string)
        # Filter rows with dates within the past x years and matching string
        filtered_rows = df[(df['released'] >= cutoff_date) & (df['released'] < date) & (df[column].apply(lambda x: string in x))]
        
        # Sum the revenue for the filtered rows
        prior_revenue_sum = filtered_rows['boxoffice'].sum()
        
        cumulative_sum += prior_revenue_sum
            
    return cumulative_sum


In [38]:
# Apply the function to each row for director revenues
df['director_rev5'] = df.apply(calculate_cumulative_sum, args={'directorid', 5}, axis=1)

nm0000600
nm0000184
nm0001060
nm0796117
nm0959034
nm0001392
nm0005366
nm0001756
nm0917188
nm0757858
nm0000165
nm0331532
nm0848414
nm0213450
nm0761498
nm0003418
nm0501185
nm0000229
nm0893659
nm0109359
nm0855035
nm0510731
nm0004675
nm0000436
nm0000631
nm0005222
nm0000399
nm0711840
nm0751080
nm0001675
nm0868219
nm0505152
nm0908824
nm1103162
nm0359387
nm0038432
nm0177170
nm0001024
nm0451884
nm0634240
nm0585011
nm0591450
nm0871860
nm0000229
nm0911061
nm0920425
nm0721817
nm0001490
nm0776271
nm1019493
nm0492909
nm1180460
nm0177015
nm0506613
nm0027572
nm0101385
nm0002083
nm0919363
nm0919369
nm0788202
nm0440458
nm0000247
nm0000265
nm0007082
nm0086690
nm0625458
nm0027271
nm0001112
nm0000776
nm0831690
nm0002700
nm0223359
nm0499724
nm0205542
nm0000408
nm0671210
nm0000829
nm0000941
nm0000217
nm0276062
nm0166256
nm0615780
nm0858525
nm1048560
nm0775447
nm0738796
nm0286975
nm0742819
nm0753382
nm0001723
nm0231190
nm0001708
nm0616880
nm0214036
nm0065284
nm0000601
nm0689852
nm1104343
nm0266622
nm0000142


nm0000361
nm0718645
nm0006476
nm0355652
nm0193554
nm1309090
nm1555287
nm0000265
nm0814517
nm0327944
nm0151540
nm0040328
nm1669022
nm0020491
nm0934483
nm0680846
nm0677953
nm0510912
nm0813309
nm0001068
nm1401857
nm0201283
nm0082450
nm0721817
nm0103595
nm0001659
nm0820800
nm0002339
nm0756935
nm0392237
nm0082300
nm0231190
nm0134811
nm0000517
nm1309359
nm0791672
nm0005387
nm1272773
nm0382072
nm1268721
nm0327273
nm0001282
nm1065402
nm0004838
nm0000389
nm0604688
nm0001241
nm0002120
nm0000095
nm1731937
nm0004716
nm1053371
nm0528381
nm0795407
nm0828099
nm0348181
nm0438090
nm0097529
nm1741096
nm0664896
nm0000631
nm0001873
nm0919369
nm2104340
nm0926211
nm0007082
nm0469694
nm0614682
nm0329650
nm0001994
nm0000500
nm1000159
nm0001302
nm0000169
nm0002044
nm1461380
nm0000600
nm1844237
nm0401203
nm0000881
nm0893659
nm0946734
nm0339030
nm0811583
nm0083348
nm0684342
nm1349376
nm0798899
nm0065608
nm0031976
nm0384722
nm0711840
nm0936482
nm0005509
nm1103162
nm0000631
nm0971239
nm9054338
nm0609549
nm0240797


nm0323239
nm0142286
nm1512910
nm1490123
nm1347153
nm0101047
nm0108132
nm0000399
nm0000217
nm0000165
nm0809877
nm0176905
nm0125803
nm0268380
nm0005190
nm0573732
nm0000229
nm0702797
nm0000123
nm0942504
nm0339004
nm0001675
nm0668247
nm0000127
nm0925870
nm0362566
nm0609549
nm0160157
nm0001081
nm0015328
nm0229694
nm0281508
nm0629272
nm0576298
nm0000142
nm0811583
nm0000229
nm0000158
nm1349522
nm0833889
nm0716347
nm1731937
nm0405632
nm0934864
nm0006960
nm0829820
nm0922346
nm0005436
nm1400426
nm0027459
nm2320658
nm0337773
nm1834213
nm0784061
nm0673400
nm2552536
nm0340958
nm0080120
nm0337773
nm0920425
nm0006487
nm1197971
nm0027271
nm0275277
nm0531817
nm0254786
nm0318916
nm1018426
nm0374048
nm0774582
nm0288144
nm0999521
nm0776271
nm0617042
nm0771054
nm0640334
nm0000517
nm0732430
nm1160495
nm0036496
nm0329755
nm0000602
nm0718646
nm1560977
nm0527261
nm0527109
nm0565336
nm1918538
nm11875524
nm0638271
nm0569790
nm0161834
nm0193508
nm0662529
nm0806492
nm1103162
nm0660707
nm0291205
nm0236226
nm0037708

nm0006476
nm1170855
nm1557594
nm0080220
nm1049433
nm0689187
nm3734458
nm0310673
nm1442514
nm0393799
nm1460159
nm0054744
nm0000128
nm0000095
nm0001331
nm0484907
nm0950426
nm0573732
nm3273000
nm1318596
nm0169806
nm0170043
nm0005139
nm0678857
nm0015295
nm0994553
nm2399862
nm0001469
nm1242054
nm1167933
nm0346550
nm0417054
nm0000490
nm0888743
nm0000876
nm0001880
nm3011011
nm1888091
nm0004056
nm0533691
nm2284484
nm0751577
nm0751648
nm0719208
nm0269463
nm1783265
nm0397174
nm0601781
nm1158544
nm0811583
nm0043742
nm0220600
nm0946734
nm0166256
nm0615780
nm2320658
nm0327944
nm0339030
nm0510912
nm0001741
nm1134029
nm2888554
nm0593610
nm0230666
nm1868917
nm0950775
nm0082450
nm1098493
nm0946734
nm0000142
nm0524190
nm0601859
nm0443505
nm0717678
nm0000386
nm1490123
nm0970447
nm0862911
nm0298807
nm0898288
nm1103162
nm1793079
nm0000318
nm0640334
nm1757777
nm0218621
nm0090386
nm1108007
nm0853238
nm1347153
nm0831557
nm0842339
nm0870469
nm2497546
nm0777881
nm0160840
nm0000154
nm0862211
nm1154886
nm1229520


nm9761320
nm4816401
nm4263190
nm0005222
nm1733778
nm4014166
nm0300866
nm1191481
nm0761498
nm0768959
nm0634240
nm0005363
nm2308774
nm3150455
nm0905592
nm1837748
nm0420941
nm0037708
nm2073372
nm0097517
nm4907810
nm0384722
nm1827931
nm0797455
nm0674020
nm0931251
nm2568279
nm0640334
nm0068587
nm0376260
nm2127315
nm1702145
nm3401779
nm2296528
nm0153774
nm0080120
nm0484907
nm0269542
nm0711110
nm1396121
nm0957505
nm1543747
nm0027271
nm0384722
nm3706989
nm0637493
nm0339030
nm1780037
nm0661751
nm0697936
nm0438228
nm0733149
nm0294457
nm1938064
nm0948159
nm0339004
nm0406334
nm0682301
nm0682342
nm3868633
nm1072411
nm2002649
nm0393678
nm2193504
nm2273232
nm3613566
nm3317268
nm3462290
nm4013000
nm0043742
nm3829351
nm5732579
nm5722382
nm0320660
nm1836315
nm0343898
nm0000517
nm0753526
nm3902889
nm0867262
nm0188722
nm0474955
nm0451155
nm6214351
nm6679051
nm0033596
nm1126068
nm4069082
nm6436658
nm1918140
nm1823264
nm0822582
nm6304969
nm0404882
nm0432380
nm0821128
nm0007045
nm1914394
nm0838647
nm0867127


In [49]:
df['director_rev5'].to_csv('d5.csv', index=False, header=['director_rev5'])

In [9]:
# Apply the function to each row for director revenues
df['director_rev10'] = df.apply(calculate_cumulative_sum, args={'directorid', 10}, axis=1)

['nm0000600']
nm0000600
['nm0000184']
nm0000184
['nm0001060']
nm0001060
['nm0796117']
nm0796117
['nm0959034']
nm0959034
['nm0001392']
nm0001392
['nm0005366']
nm0005366
['nm0001756']
nm0001756
['nm0917188', 'nm0757858']
nm0917188
nm0757858
['nm0000165']
nm0000165
['nm0331532']
nm0331532
['nm0848414']
nm0848414
['nm0213450', 'nm0761498']
nm0213450
nm0761498
['nm0003418']
nm0003418
['nm0501185']
nm0501185
['nm0000229']
nm0000229
['nm0893659']
nm0893659
['nm0109359']
nm0109359
['nm0855035']
nm0855035
['nm0510731']
nm0510731
['nm0004675']
nm0004675
['nm0000436']
nm0000436
['nm0000631']
nm0000631
['nm0005222']
nm0005222
['nm0000399']
nm0000399
['nm0711840']
nm0711840
['nm0751080']
nm0751080
['nm0001675']
nm0001675
['nm0868219']
nm0868219
['nm0505152']
nm0505152
['nm0908824']
nm0908824
['nm1103162']
nm1103162
['nm0359387']
nm0359387
['nm0038432', 'nm0177170']
nm0038432
nm0177170
['nm0001024']
nm0001024
['nm0451884']
nm0451884
['nm0634240']
nm0634240
['nm0585011']
nm0585011
['nm0591450']
nm059

['nm0747849']
nm0747849
['nm0000233']
nm0000233
['nm0001129']
nm0001129
['nm0838198']
nm0838198
['nm0640334']
nm0640334
['nm0000916']
nm0000916
['nm0868219']
nm0868219
['nm0000568']
nm0000568
['nm0811583']
nm0811583
['nm0109359']
nm0109359
['nm0107774', 'nm0344259']
nm0107774
nm0344259
['nm0878638']
nm0878638
['nm0935095']
nm0935095
['nm0155093']
nm0155093
['nm1490123']
nm1490123
['nm0955443']
nm0955443
['nm0298807']
nm0298807
['nm0743093']
nm0743093
['nm0936838']
nm0936838
['nm0278181', 'nm0998551']
nm0278181
nm0998551
['nm0462895']
nm0462895
['nm0106230']
nm0106230
['nm0275286']
nm0275286
['nm0381478']
nm0381478
['nm0001317']
nm0001317
['nm0831321']
nm0831321
['nm0685759']
nm0685759
['nm0001053', 'nm0001054']
nm0001053
nm0001054
['nm0452319']
nm0452319
['nm0175775']
nm0175775
['nm0005190']
nm0005190
['nm1103162']
nm1103162
['nm0327273']
nm0327273
['nm0420941']
nm0420941
['nm0000231']
nm0000231
['nm0378144']
nm0378144
['nm0005295']
nm0005295
['nm0142286']
nm0142286
['nm0514546']
nm051

nm0271402
['nm0001716']
nm0001716
['nm0005367']
nm0005367
['nm0000583']
nm0000583
['nm0240797']
nm0240797
['nm0206760', 'nm0267512']
nm0206760
nm0267512
['nm0005540']
nm0005540
['nm0639965']
nm0639965
['nm0179697']
nm0179697
['nm0266777']
nm0266777
['nm0001112']
nm0001112
['nm0601382']
nm0601382
['nm0939128']
nm0939128
['nm0935095']
nm0935095
['nm0634240']
nm0634240
['nm0002159']
nm0002159
['nm0249897', 'nm0250410', 'nm0498555']
nm0249897
nm0250410
nm0498555
['nm0583600']
nm0583600
['nm0518644']
nm0518644
['nm0783536', 'nm0294997']
nm0783536
nm0294997
['nm0000230']
nm0000230
['nm0922346']
nm0922346
['nm0744834']
nm0744834
['nm0428600']
nm0428600
['nm0304521']
nm0304521
['nm0000154']
nm0000154
['nm1595280']
nm1595280
['nm0796117']
nm0796117
['nm0014960']
nm0014960
['nm0858525']
nm0858525
['nm0286975']
nm0286975
['nm1139726']
nm1139726
['nm0509448']
nm0509448
['nm1234345']
nm1234345
['nm0423333']
nm0423333
['nm0911061']
nm0911061
['nm0931727']
nm0931727
['nm0001469']
nm0001469
['nm017437

nm0001054
['nm0715636']
nm0715636
['nm0160840']
nm0160840
['nm0352524']
nm0352524
['nm0788202']
nm0788202
['nm0000399']
nm0000399
['nm1052162']
nm1052162
['nm0591450']
nm0591450
['nm0937748']
nm0937748
['nm0280814', 'nm0505662']
nm0280814
nm0505662
['nm0917992']
nm0917992
['nm0610831']
nm0610831
['nm0474955']
nm0474955
['nm0525303']
nm0525303
['nm1065402']
nm1065402
['nm0905154', 'nm0905152']
nm0905154
nm0905152
['nm1879589']
nm1879589
['nm0000230']
nm0000230
['nm0002700']
nm0002700
['nm1347153']
nm1347153
['nm0938045']
nm0938045
['nm0001741']
nm0001741
['nm0601382']
nm0601382
['nm0000816']
nm0000816
['nm0000759']
nm0000759
['nm0001438']
nm0001438
['nm0000631']
nm0000631
['nm0294997', 'nm0783536']
nm0294997
nm0783536
['nm1375358', 'nm1376383']
nm1375358
nm1376383
['nm0847859']
nm0847859
['nm0697656']
nm0697656
['nm1347153']
nm1347153
['nm0027271']
nm0027271
['nm0000142']
nm0000142
['nm1396048']
nm1396048
['nm0271402', 'nm0828207']
nm0271402
nm0828207
['nm0022768']
nm0022768
['nm1731937

['nm0593610']
nm0593610
['nm0213450', 'nm0761498']
nm0213450
nm0761498
['nm0958969']
nm0958969
['nm0504642']
nm0504642
['nm0240797']
nm0240797
['nm1977355', 'nm0397174']
nm1977355
nm0397174
['nm0569891']
nm0569891
['nm0796117']
nm0796117
['nm0000217']
nm0000217
['nm0570912']
nm0570912
['nm0637518']
nm0637518
['nm2676052']
nm2676052
['nm0871860']
nm0871860
['nm0005190']
nm0005190
['nm0000631']
nm0000631
['nm0000230']
nm0000230
['nm0680846']
nm0680846
['nm0506613']
nm0506613
['nm0455078']
nm0455078
['nm0400436', 'nm0400441']
nm0400436
nm0400441
['nm0000399']
nm0000399
['nm0000255']
nm0000255
['nm0001565']
nm0001565
['nm0777881']
nm0777881
['nm0001060']
nm0001060
['nm0931095']
nm0931095
['nm0919369']
nm0919369
['nm0614682']
nm0614682
['nm0000776']
nm0000776
['nm0002120']
nm0002120
['nm0001716']
nm0001716
['nm0138620']
nm0138620
['nm0003506']
nm0003506
['nm0005366']
nm0005366
['nm0001053', 'nm0001054']
nm0001053
nm0001054
['nm0855035']
nm0855035
['nm0294457']
nm0294457
['nm0005509']
nm0005

['nm1025280']
nm1025280
['nm0001885']
nm0001885
['nm3686896']
nm3686896
['nm0158984']
nm0158984
['nm0898288']
nm0898288
['nm0935863']
nm0935863
['nm0923736']
nm0923736
['nm0634240']
nm0634240
['nm0002657']
nm0002657
['nm0005222']
nm0005222
['nm0174374']
nm0174374
['nm1989536']
nm1989536
['nm0028764', 'nm0152312', 'nm0700760']
nm0028764
nm0152312
nm0700760
['nm0001392']
nm0001392
['nm0532235']
nm0532235
['nm0201509', 'nm0569891', 'nm0970447']
nm0201509
nm0569891
nm0970447
['nm0719208', 'nm0049633']
nm0719208
nm0049633
['nm0001756']
nm0001756
['nm0601781']
nm0601781
['nm0553942', 'nm0862211']
nm0553942
nm0862211
['nm2782185']
nm2782185
['nm0850733']
nm0850733
['nm0576298']
nm0576298
['nm0520488', 'nm0588087']
nm0520488
nm0588087
['nm0000229']
nm0000229
['nm0000631']
nm0000631
['nm1174251']
nm1174251
['nm1302591']
nm1302591
['nm0001752']
nm0001752
['nm0006904']
nm0006904
['nm0000255']
nm0000255
['nm0679031']
nm0679031
['nm0000709']
nm0000709
['nm1103162']
nm1103162
['nm0709056']
nm0709056

['nm0751577', 'nm0751648']
nm0751577
nm0751648
['nm0520488', 'nm0588087']
nm0520488
nm0588087
['nm0000881']
nm0000881
['nm0834902']
nm0834902
['nm0001741']
nm0001741
['nm0716257']
nm0716257
['nm2320658', 'nm0930261']
nm2320658
nm0930261
['nm1989536']
nm1989536
['nm2284484']
nm2284484
['nm0520488', 'nm0588087']
nm0520488
nm0588087
['nm0509448']
nm0509448
['nm0001392']
nm0001392
['nm0634240']
nm0634240
['nm0213450']
nm0213450
['nm0000399']
nm0000399
['nm1139726']
nm1139726
['nm0831557']
nm0831557
['nm1103162']
nm1103162
['nm0757858']
nm0757858
['nm0000108']
nm0000108
['nm0000916']
nm0000916
['nm1837748']
nm1837748
['nm0591450']
nm0591450
['nm1729171']
nm1729171
['nm1226871']
nm1226871
['nm0004716']
nm0004716
['nm0298807']
nm0298807
['nm0510731']
nm0510731
['nm1429471']
nm1429471
['nm0908824']
nm0908824
['nm0043742']
nm0043742
['nm0125803', 'nm0268380']
nm0125803
nm0268380
['nm1229520']
nm1229520
['nm0502954']
nm0502954
['nm0001024']
nm0001024
['nm0339004']
nm0339004
['nm0000123']
nm00001

nm0950775
['nm0082450']
nm0082450
['nm1098493']
nm1098493
['nm0946734']
nm0946734
['nm0000142']
nm0000142
['nm0524190', 'nm0601859']
nm0524190
nm0601859
['nm0443505', 'nm0717678']
nm0443505
nm0717678
['nm0000386']
nm0000386
['nm1490123']
nm1490123
['nm0970447', 'nm0862911']
nm0970447
nm0862911
['nm0298807']
nm0298807
['nm0898288']
nm0898288
['nm1103162']
nm1103162
['nm1793079']
nm1793079
['nm0000318']
nm0000318
['nm0640334']
nm0640334
['nm1757777']
nm1757777
['nm0218621']
nm0218621
['nm0090386']
nm0090386
['nm1108007']
nm1108007
['nm0853238']
nm0853238
['nm1347153']
nm1347153
['nm0831557', 'nm0842339']
nm0831557
nm0842339
['nm0870469']
nm0870469
['nm2497546']
nm2497546
['nm0777881']
nm0777881
['nm0160840']
nm0160840
['nm0000154']
nm0000154
['nm0862211', 'nm1154886']
nm0862211
nm1154886
['nm1229520']
nm1229520
['nm0619836']
nm0619836
['nm0726638']
nm0726638
['nm0000916']
nm0000916
['nm0428600']
nm0428600
['nm0001880']
nm0001880
['nm4824763']
nm4824763
['nm0000229']
nm0000229
['nm0831557

['nm1417640']
nm1417640
['nm2300570']
nm2300570
['nm1392994']
nm1392994
['nm0147737']
nm0147737
['nm0894207']
nm0894207
['nm1291105']
nm1291105
['nm0998825']
nm0998825
['nm0771054']
nm0771054
['nm1479854']
nm1479854
['nm1010896', 'nm0791556']
nm1010896
nm0791556
['nm0831321']
nm0831321
['nm1194320']
nm1194320
['nm1443023']
nm1443023
['nm0818485']
nm0818485
['nm2167704']
nm2167704
['nm0943104']
nm0943104
['nm0585011']
nm0585011
['nm3101757']
nm3101757
['nm0129919', 'nm3614151']
nm0129919
nm3614151
['nm4006588']
nm4006588
['nm3363032']
nm3363032
['nm0751577', 'nm0751648']
nm0751577
nm0751648
['nm0083348']
nm0083348
['nm1291105']
nm1291105
['nm0500610']
nm0500610
['nm0155528', 'nm0608714']
nm0155528
nm0608714
['nm0003160']
nm0003160
['nm0715636']
nm0715636
['nm0000165']
nm0000165
['nm0281508']
nm0281508
['nm0177896']
nm0177896
['nm1490123']
nm1490123
['nm0001741']
nm0001741
['nm1024677']
nm1024677
['nm1601882', 'nm0601781']
nm1601882
nm0601781
['nm0160840']
nm0160840
['nm0850733']
nm08507

['nm0667734']
nm0667734
['nm0309996']
nm0309996
['nm0001723']
nm0001723
['nm2092965']
nm2092965
['nm1410756', 'nm0001628']
nm1410756
nm0001628
['nm0258531', 'nm0295243']
nm0258531
nm0295243
['nm2336554']
nm2336554
['nm0618680']
nm0618680
['nm0000264']
nm0000264
['nm1169562']
nm1169562
['nm5579635']
nm5579635
['nm0005101']
nm0005101
['nm10095184']
nm10095184
['nm0003620']
nm0003620
['nm0391739']
nm0391739
['nm1460159']
nm1460159
['nm1804614']
nm1804614
['nm2776774']
nm2776774
['nm6682160']
nm6682160
['nm2831530']
nm2831530
['nm1033668']
nm1033668
['nm1410815']
nm1410815
['nm2720681']
nm2720681
['nm1020835']
nm1020835
['nm0155280', 'nm1299690', 'nm1928500']
nm0155280
nm1299690
nm1928500
['nm0408043']
nm0408043
['nm2134474']
nm2134474
['nm9761320']
nm9761320
['nm4816401', 'nm4263190']
nm4816401
nm4263190
['nm0005222']
nm0005222
['nm1733778']
nm1733778
['nm4014166']
nm4014166
['nm0300866']
nm0300866
['nm1191481']
nm1191481
['nm0761498']
nm0761498
['nm0768959']
nm0768959
['nm0634240']
nm063

['nm8745994']
nm8745994
['nm2347386']
nm2347386
['nm3579257', 'nm4895825', 'nm9048529']
nm3579257
nm4895825
nm9048529
['nm0000576']
nm0000576
['nm0507425']
nm0507425
['nm1906430']
nm1906430
['nm0048918']
nm0048918
['nm3792517', 'nm1723335']
nm3792517
nm1723335
['nm0175352']
nm0175352
['nm8328802', 'nm4593873']
nm8328802
nm4593873
['nm0155280', 'nm0482592', 'nm0007139']
nm0155280
nm0482592
nm0007139
['nm0000682']
nm0000682
['nm3152327']
nm3152327
['nm0998930']
nm0998930
['nm0654648']
nm0654648
['nm1633015']
nm1633015
['nm1644765']
nm1644765
['nm0355542']
nm0355542
['nm1676649']
nm1676649
['nm0775056']
nm0775056
['nm0753423']
nm0753423
['nm0150802']
nm0150802
['nm6508110']
nm6508110
['nm0702012']
nm0702012
['nm1656974', 'nm2593537']
nm1656974
nm2593537
['nm2982240']
nm2982240
['nm0305017']
nm0305017
['nm5277246']
nm5277246
['nm0197636', 'nm7557471']
nm0197636
nm7557471
['nm2442244']
nm2442244
['nm1594248']
nm1594248
['nm2066439']
nm2066439
['nm2676052']
nm2676052
['nm3363032']
nm3363032


In [12]:
# Apply the function to each row for actor revenues
df['actor_rev5'] = df.apply(calculate_cumulative_sum, args={'actorid', 5}, axis=1)
df['actor_rev10'] = df.apply(calculate_cumulative_sum, args={'actorid', 10}, axis=1)

nm0001497
nm0000379
nm0000353
nm0159789
nm0000204
nm0000191
nm0705356
nm0342488
nm0914612
nm0000154
nm0001618
nm0191412
nm0889522
nm0179173
nm0176073
nm0000704
nm0005212
nm0001557
nm0000196
nm0461498
nm0001293
nm0000169
nm0000226
nm0001800
nm0001459
nm0000491
nm0005380
nm0000128
nm0000438
nm0000124
nm0000498
nm0005327
nm0001264
nm0000112
nm0000932
nm0683253
nm0153738
nm0761498
nm0000119
nm0004874
nm0000782
nm0190744
nm0000741
nm0107748
nm0593310
nm0000129
nm0268199
nm0608090
nm0915208
nm0376540
nm0004051
nm0001191
nm0000213
nm0001806
nm0000702
nm0001131
nm0524197
nm0004376
nm0000354
nm0177933
nm0000255
nm0000151
nm1172817
nm0004896
nm0005261
nm0000107
nm0001326
nm0000191
nm0001744
nm0000158
nm0388382
nm1005576
nm0000149
nm0829576
nm0001845
nm0000164
nm0001570
nm0000146
nm0425005
nm0104526
nm0003817
nm0891786
nm0754512
nm0000104
nm0000648
nm0001434
nm0000579
nm0000421
nm0000336
nm0005437
nm0000154
nm0000656
nm0001427
nm0001084
nm0147825
nm1073992
nm0000598
nm0263625
nm0341737
nm0000354


nm0506405
nm0000169
nm0000949
nm0939697
nm0000255
nm0001173
nm0000235
nm0000334
nm0005405
nm0454809
nm0275835
nm0191906
nm0763052
nm0005450
nm0005203
nm0000192
nm0000329
nm0262968
nm0001231
nm0838911
nm0005042
nm0931404
nm0000401
nm1035682
nm0428963
nm0000598
nm0000232
nm0001151
nm0480465
nm0835045
nm0004867
nm1095720
nm0660984
nm0369357
nm0000983
nm0000140
nm0005351
nm0000409
nm0001184
nm0000188
nm0000191
nm0000250
nm0001383
nm0000228
nm0000701
nm0001473
nm0908094
nm0124930
nm0175262
nm0001302
nm0506405
nm0571106
nm0134244
nm0587396
nm1491136
nm0005517
nm0005112
nm0040591
nm0005377
nm0000106
nm0000123
nm0000569
nm0000775
nm0000593
nm0005110
nm0005346
nm0725200
nm0005134
nm0005466
nm0004757
nm0244630
nm0005438
nm0004825
nm0000200
nm1315052
nm0008656
nm0252230
nm0851582
nm0645683
nm0005561
nm0005028
nm0001601
nm0601553
nm0285913
nm0048414
nm0002332
nm0000380
nm0001099
nm0001570
nm0001608
nm0000450
nm0354085
nm0000947
nm0608405
nm0000323
nm0000409
nm0229498
nm0122653
nm0000163
nm0001838


nm0005278
nm0005260
nm0005450
nm0004879
nm0000113
nm0005093
nm0000638
nm0000702
nm0749263
nm0006610
nm0005028
nm0765597
nm0117146
nm0089217
nm1200692
nm0000553
nm0005538
nm0221043
nm0005128
nm0000188
nm0001372
nm0240381
nm0000598
nm0333410
nm0424060
nm0000104
nm0001876
nm0001722
nm0000178
nm0000131
nm0001610
nm0005458
nm0005520
nm1690331
nm0000106
nm0266422
nm1555151
nm0000551
nm0000572
nm0000132
nm1064823
nm0520064
nm0004862
nm0848554
nm0088127
nm1065229
nm0000354
nm0005132
nm0000899
nm0001451
nm0000224
nm0005023
nm0000123
nm0000354
nm0001605
nm0424060
nm0000191
nm0005023
nm0654110
nm0000098
nm0001993
nm0000246
nm0001629
nm0005583
nm0000146
nm0001838
nm0396812
nm0461136
nm0532193
nm0000950
nm0000671
nm0001427
nm0001315
nm0001057
nm0000139
nm0000511
nm1455681
nm0001099
nm0001693
nm0000621
nm0266824
nm0004912
nm0524197
nm0004754
nm0004937
nm0614877
nm0385296
nm0193846
nm0000551
nm0005226
nm0202603
nm0005351
nm0005442
nm0267506
nm0001557
nm0004742
nm0000438
nm1265067
nm0117146
nm0015382


nm1097515
nm1107001
nm0427358
nm0103750
nm0241049
nm0313534
nm1312575
nm0908094
nm1080974
nm0001590
nm0189200
nm0000906
nm0004051
nm0184445
nm0004802
nm0429250
nm0000232
nm0607375
nm0000667
nm0000206
nm0000213
nm0000375
nm0000056
nm1563863
nm2179478
nm0001573
nm0733427
nm0000205
nm0000169
nm0001608
nm0948267
nm0000164
nm0002663
nm0713747
nm0000136
nm0608090
nm0000518
nm0001497
nm0000379
nm0333410
nm0000196
nm0000139
nm0000552
nm0479471
nm1083271
nm0241049
nm0000136
nm0089217
nm0461136
nm0705356
nm0914612
nm0342488
nm0000354
nm1183149
nm0000260
nm0124930
nm0372176
nm0920992
nm0004951
nm0738918
nm0652663
nm0000226
nm0103797
nm2253071
nm0144657
nm0001413
nm0004813
nm0000741
nm0001454
nm0000237
nm0736622
nm0001337
nm0748620
nm0005134
nm0046559
nm0439739
nm0000329
nm0000676
nm0001884
nm0000246
nm0519043
nm0648249
nm0000115
nm1208167
nm0058581
nm0344435
nm0004695
nm0262635
nm0000243
nm0000128
nm0252230
nm0000632
nm0000250
nm0000111
nm0148418
nm1706767
nm2395586
nm0001191
nm0416673
nm0004754


nm0004787
nm1304386
nm0000178
nm0004988
nm0189200
nm0000212
nm0578949
nm0000906
nm0122653
nm0815370
nm0847799
nm0000204
nm0424060
nm0051509
nm0000206
nm0001845
nm0491402
nm0005045
nm1592225
nm1645304
nm0068166
nm0000558
nm0668139
nm0000982
nm0006969
nm0344435
nm0267511
nm0005023
nm1720028
nm0000332
nm0001602
nm0659544
nm0356017
nm0424060
nm0000849
nm0004874
nm0000706
nm0858048
nm2353862
nm2951768
nm0795661
nm0000141
nm0000096
nm0175262
nm0000473
nm0001451
nm0005017
nm0000438
nm0001557
nm0000250
nm0000131
nm0788340
nm0000114
nm0005028
nm0176981
nm0004755
nm1113550
nm0001804
nm0000349
nm0039162
nm0540441
nm0006958
nm0000199
nm0001860
nm1360270
nm0000126
nm1745736
nm0001288
nm0001570
nm0268199
nm0001187
nm0000115
nm0947777
nm1395312
nm0000576
nm0000982
nm0386472
nm0000630
nm2869099
nm1244012
nm0001182
nm0591373
nm1440402
nm0461136
nm0000146
nm1002641
nm2247245
nm2003700
nm0916617
nm0315041
nm0001427
nm0000495
nm0532683
nm0000168
nm0424060
nm0005175
nm2415874
nm3937434
nm0388113
nm1007740


nm0005458
nm0001472
nm0000375
nm0302108
nm1157358
nm0136797
nm0275486
nm0000242
nm0000572
nm0000326
nm0004862
nm0000243
nm0005109
nm0829032
nm0251986
nm1940449
nm0005493
nm0000255
nm0356017
nm0358316
nm0350453
nm2605345
nm0001426
nm0000246
nm0000545
nm0000151
nm0503567
nm0571727
nm0176869
nm2209370
nm2913790
nm1454378
nm0001774
nm0001632
nm0000134
nm0000210
nm0000849
nm0420955
nm1602660
nm1342727
nm1670137
nm1475594
nm1086543
nm0420955
nm0000243
nm1517976
nm0206257
nm0000553
nm0177896
nm1663205
nm0000129
nm0000139
nm0765597
nm0136797
nm0748620
nm1210895
nm0000313
nm0000354
nm2794962
nm0000098
nm0124930
nm1631320
nm2325393
nm2953573
nm0006713
nm0000115
nm0059431
nm0000547
nm0355097
nm1913734
nm0973177
nm1415323
nm2955013
nm0001427
nm0001125
nm0000164
nm1289434
nm1706767
nm1258970
nm0005253
nm0000170
nm0005123
nm0589505
nm0001390
nm1347153
nm0779325
nm0425005
nm0000171
nm0000267
nm0000178
nm0000518
nm0553269
nm1297015
nm0004789
nm0046112
nm2093097
nm0159789
nm0000369
nm0000101
nm0005493


nm0000849
nm3159825
nm3904817
nm2279940
nm0101198
nm0550452
nm0241121
nm0067367
nm0000422
nm0406975
nm0000603
nm0000667
nm0000255
nm0177933
nm0000169
nm0316079
nm0683253
nm0388933
nm0085407
nm0000380
nm0502425
nm0000640
nm0000389
nm0000679
nm0105131
nm5004873
nm5004061
nm0933727
nm0003909
nm0705005
nm0000198
nm0000147
nm0362766
nm0428065
nm0947338
nm2225369
nm2075208
nm2397617
nm3780357
nm0004335
nm1027719
nm1867717
nm0004741
nm0891786
nm0065493
nm0000104
nm0025745
nm1693432
nm0647634
nm0005299
nm0370035
nm0000980
nm0790689
nm0544334
nm0451321
nm0525518
nm0700875
nm0002071
nm0356017
nm3246926
nm0836343
nm0000438
nm0268199
nm0841910
nm0000467
nm0000207
nm0451321
nm0992000
nm0004626
nm0000379
nm0001250
nm0000662
nm1456571
nm3196780
nm0001283
nm0232357
nm3946526
nm4048274
nm0044073
nm0246386
nm0309945
nm0176869
nm0117339
nm0426038
nm0000375
nm0262635
nm0424060
nm0000288
nm0362766
nm0004266
nm2225369
nm1242688
nm2955013
nm0185819
nm0000849
nm0365140
nm0829576
nm1500155
nm1210124
nm1940449


nm0001774
nm1325419
nm1789985
nm2934314
nm2570429
nm1588066
nm0000681
nm0695435
nm1130627
nm3798384
nm4089170
nm4098557
nm1093951
nm1631269
nm2395586
nm0117709
nm1165110
nm1312575
nm1519680
nm1796057
nm1831976
nm0670408
nm0296545
nm0293509
nm1886602
nm2014390
nm1947831
nm0000230
nm0000216
nm1265067
nm0330687
nm0424060
nm0000194
nm2185462
nm0881631
nm3886028
nm0000206
nm0760796
nm0793069
nm0136797
nm3769935
nm0000114
nm0000134
nm0000473
nm0001337
nm0000190
nm4446467
nm5015107
nm1745736
nm0004875
nm0779325
nm0136797
nm0001057
nm0005049
nm0331516
nm0177896
nm0578949
nm0005458
nm0290556
nm0000213
nm0000242
nm0000128
nm0001876
nm2129444
nm0205626
nm0000668
nm0000255
nm0005493
nm2605345
nm1801800
nm1846132
nm1410105
nm0915208
nm0000191
nm4043618
nm0001749
nm0002091
nm0175262
nm4563869
nm0001691
nm0001833
nm0275486
nm0748620
nm1822659
nm0005458
nm0000182
nm0004821
nm0000506
nm0001254
nm0001416
nm0005392
nm3967248
nm0357979
nm0001132
nm0176869
nm4080531
nm1055413
nm0004851
nm0000139
nm0000134


nm0000383
nm0444786
nm0000104
nm0265067
nm6578009
nm3659660
nm4207146
nm0425005
nm0001303
nm1275259
nm0362766
nm0000234
nm0396558
nm0940362
nm5052065
nm3772243
nm0000147
nm5473782
nm0000168
nm6907855
nm0006837
nm5295511
nm0565250
nm0126284
nm0000179
nm0942482
nm0000531
nm5558187
nm2154960
nm0352778
nm0488953
nm0430107
nm0000230
nm1935086
nm0000123
nm1429380
nm0491402
nm0002071
nm0366389
nm1555340
nm0000216
nm0164809
nm3592338
nm0000553
nm0001845
nm1192254
nm3729721
nm2546012
nm1032473
nm0000242
nm0532235
nm1086543
nm0085312
nm1910255
nm3843467
nm0001191
nm0416673
nm1157358
nm0095017
nm1020089
nm0910278
nm0000134
nm0004266
nm0000623
nm0416673
nm2263115
nm0895150
nm0000158
nm0753314
nm0000257
nm2068185
nm3956954
nm1620433
nm1475594
nm0542133
nm0093589
nm4609822
nm4590837
nm0241759
nm0002071
nm0000242
nm0004802
nm0366389
nm1265802
nm0192505
nm0000136
nm1212722
nm0424848
nm1159180
nm0000775
nm1711114
nm0005125
nm1013003
nm0004820
nm1886602
nm0544718
nm0430107
nm0005134
nm0519043
nm1131557


nm0001749
nm0421105
nm0000980
nm2413125
nm0144260
nm1745736
nm1676221
nm1672246
nm1676223
nm1663205
nm0000619
nm2247245
nm0258402
nm1748489
nm5228245
nm8136461
nm0654648
nm2143282
nm0852517
nm0443286
nm0911589
nm0268199
nm0001838
nm1875238
nm0451148
nm2799219
nm0760778
nm2225369
nm4601388
nm2836768
nm0777788
nm0945883
nm2558242
nm1550948
nm0249291
nm3286200
nm0915208
nm6390427
nm5016878
nm0056187
nm0835016
nm2313103
nm6026206
nm0909906
nm3322537
nm0000204
nm0765597
nm1950086
nm1754366
nm4554036
nm0068599
nm0001557
nm1126657
nm4705966
nm0006795
nm3087728
nm0393535
nm2353862
nm0000173
nm1913734
nm0000554
nm5421877
nm0853498
nm1886602
nm0001173
nm0005408
nm0001173
nm0396924
nm1503432
nm0766837
nm0525921
nm0001363
nm1183149
nm0713378
nm0000134
nm1221047
nm1843026
nm1898126
nm0000215
nm0126284
nm0799777
nm1633541
nm0706787
nm3087728
nm0000158
nm4025491
nm0002004
nm0200452
nm0705356
nm0935541
nm0001648
nm0183822
nm0416524
nm0001838
nm0929489
nm0001758
nm0544718
nm5896355
nm3310211
nm0707425


nm5637553
nm2225369
nm0249291
nm0774386
nm0005517
nm0121605
nm3450051
nm1018488
nm5589690
nm5582294
nm1165110
nm0788335
nm0671567
nm0124930
nm1032567
nm6578009
nm0331516
nm2946516
nm0164809
nm0001057
nm8412536
nm0000321
nm0378245
nm0072713
nm2981170
nm0000136
nm0564215
nm1289434
nm4489994
nm2577076
nm0641944
nm0205626
nm0735442
nm4456120
nm1423955
nm0692677
nm6556997
nm0924210
nm0000424
nm0095017
nm3538718
nm0004950
nm0241049
nm0000553
nm0267812
nm0933940
nm7556035
nm7582805
nm7052140
nm0000242
nm1659348
nm3299397
nm2240346
nm1778512
nm5105099
nm0004950
nm0302330
nm0651159
nm0000246
nm0000352
nm0000223
nm0857620
nm0571952
nm7529030
nm0000204
nm0000492
nm1935086
nm0001557
nm0991810
nm0004802
nm0186505
nm8659025
nm0001570
nm1840504
nm1754366
nm1165044
nm0940362
nm3510471
nm0859005
nm5473782
nm0004937
nm0578853
nm0151540
nm0373571
nm0501399
nm1428821
nm5065920
nm3596959
nm0996651
nm0579914
nm1146051
nm0791864
nm0683253
nm4768271
nm3964350
nm0356021
nm0395203
nm3053338
nm1659221
nm0005049


nm10921608
nm10921581
nm3828984
nm9071083
nm1399243
nm0006795
nm1229940
nm1571487
nm0479471
nm2348627
nm7415871
nm1659141
nm3601766
nm0006763
nm1017633
nm6442009
nm0004569
nm1172285
nm1410318
nm3695347
nm0004851
nm0000849
nm0201857
nm0251986
nm0005273
nm1782299
nm0586568
nm5386550
nm10443784
nm2348646
nm3511805
nm7211206
nm0072829
nm0089707
nm0097765
nm0004335
nm7021807
nm5446189
nm4069220
nm0540925
nm0149286
nm0000226
nm0001454
nm1227814
nm2835616
nm1126657
nm0990547
nm2355635
nm0005188
nm0000120
nm3053338
nm0001609
nm0935541
nm0000375
nm0000104
nm0790688
nm0005253
nm2719825
nm2976830
nm0000148
nm1082477
nm4446254
nm4043618
nm0695435
nm0000506
nm0913475
nm1500155
nm4456120
nm0000190
nm0402271
nm1890784
nm0430107
nm0004937
nm0488953
nm0000115
nm1297015
nm0005351
nm0671567
nm0702572
nm1423955
nm3586035
nm5896355
nm6390427
nm2933757
nm1517976
nm1325419
nm1840504
nm0126284
nm0000161
nm0923105
nm1770380
nm0113357
nm0000128
nm3308569
nm5145655
nm4793987
nm3147751
nm6001320
nm0000134
nm00002

nm2348627
nm0504832
nm3955926
nm0582126
nm0001163
nm6328300
nm5939164
nm0803889
nm0781981
nm6038328
nm8627157
nm3760302
nm8324884
nm2541232
nm0535391
nm0535392
nm0065059
nm7214498
nm7603745
nm4164856
nm0501256
nm0545166
nm0275637
nm3931538
nm2168927
nm4097479
nm0000136
nm0001845
nm0403947
nm13144837
nm0465503
nm7184245
nm12058353
nm12058347
nm12058350
nm2778747
nm4563820
nm4207146
nm4150847
nm0799777
nm1573253
nm5189611
nm8412322
nm6419392
nm1366335
nm5249562
nm8176068
None
nm2031682
nm0030394
nm2365157
nm0000115
nm6376834
nm0335275
nm0000198
nm2309517
nm1431940
nm0005350
nm3796542
nm10847160
nm2731877
nm9380549
nm10971924
nm0051509
nm0642444
nm1218757
nm8328802
nm9215739
nm10923005
nm0943104
nm3725393
nm1443336
nm1812637
nm0001648
nm7346682
nm0632689
nm4392528
nm1929776
None
nm0654648
nm6403233
nm6807858
nm4580178
nm0578577
nm2415604
nm1496115
nm9169920
nm0463539
nm1405398
nm0279545
nm2929682
nm1676649
nm1165044
nm3354041
nm0250958
nm1405398
nm1197689
nm2412823
nm1251770
nm0401135
nm0

nm0829390
nm0305519
nm0000152
nm0001473
nm0251678
nm0001772
nm0000408
nm0000653
nm0000148
nm0819874
nm0824220
nm0000138
nm0000139
nm0000358
nm0929489
nm0000651
nm0001763
nm0330687
nm0000668
nm0001737
nm0000552
nm0005562
nm0000463
nm0001084
nm0258402
nm0936762
nm0000113
nm0001035
nm0331516
nm0000245
nm0001567
nm0890232
nm0000671
nm0000932
nm0801231
nm0000130
nm0005354
nm0520562
nm0004691
nm0870204
nm0603413
nm0000126
nm0860749
nm0608012
nm0000215
nm0000443
nm0001691
nm0000164
nm0001674
nm0550624
nm0000129
nm0036489
nm1071147
nm0702809
nm0005164
nm0244630
nm0000523
nm0000321
nm0253035
nm0000134
nm0000345
nm0001435
nm0103038
nm0159776
nm0032375
nm0410455
nm0410457
nm0844294
nm0004875
nm0005125
nm0080049
nm0000142
nm0001099
nm0001378
nm0001084
nm0258402
nm0281107
nm0005458
nm0795517
nm0776580
nm1008415
nm1007740
nm0388113
nm0000139
nm0005048
nm0000775
nm0001705
nm1046097
nm0267506
nm0001191
nm0001705
nm0864490
nm0000134
nm0290556
nm0000531
nm0000347
nm0000391
nm0281359
nm0005351
nm0005346


nm0339304
nm0000093
nm0051509
nm0089217
nm0001774
nm0000134
nm0001100
nm0001191
nm0000106
nm0001705
nm0413168
nm0000295
nm0746896
nm0601619
nm0124133
nm0000255
nm0001774
nm0852132
nm0000681
nm0000244
nm0000458
nm0001618
nm0001264
nm0004736
nm0245112
nm0000129
nm0004937
nm0000586
nm0000123
nm0000093
nm0000210
nm0004266
nm0089485
nm0000267
nm0001774
nm0005562
nm0004879
nm0001774
nm0000098
nm0005226
nm0517820
nm1057932
nm1046097
nm0002071
nm0000775
nm0136797
nm0000120
nm0000179
nm0000658
nm0005327
nm0001264
nm0000498
nm0444786
nm0001787
nm0120309
nm0001687
nm0001258
nm1046097
nm0005125
nm0000448
nm0100556
nm0000243
nm0000686
nm0266824
nm0000158
nm0001876
nm0564277
nm0005227
nm0001349
nm0864997
nm0001618
nm0000237
nm0057150
nm0000741
nm0000130
nm0000101
nm0004937
nm0005093
nm0913488
nm0005541
nm0915465
nm0005311
nm0001557
nm0001725
nm0733196
nm0000194
nm0922035
nm1468254
nm0000235
nm0001016
nm0000514
nm0000243
nm0000630
nm0000658
nm0001084
nm0147825
nm1073992
nm0000621
nm0165101
nm0922263


nm0189887
nm0000450
nm0004286
nm0001416
nm0005132
nm0386472
nm0711559
nm0680667
nm0536476
nm1040961
nm0713378
nm0001590
nm0004825
nm0461136
nm0000620
nm1183149
nm0000973
nm0001212
nm0290556
nm0000188
nm0000132
nm0005403
nm0606690
nm0095751
nm0655585
nm0001447
nm0000111
nm0000235
nm0000131
nm0000671
nm0001567
nm0000182
nm0000602
nm0000151
nm0000168
nm0506405
nm0001208
nm2314513
nm2857240
nm5154106
nm0000116
nm0755972
nm1410338
nm0000569
nm0000164
nm0204706
nm0001132
nm0001749
nm0117709
nm0376540
nm0706787
nm1318321
nm1375020
nm0626362
nm0232963
nm0137506
nm0725543
nm0001674
nm0004778
nm0461136
nm0185819
nm0000158
nm0179173
nm0403914
nm0095017
nm0039162
nm0005026
nm0004486
nm0487884
nm0559890
nm0000228
nm0098378
nm0000422
nm0000225
nm0005346
nm0001151
nm0997240
nm0001099
nm0001473
nm1890769
nm0047962
nm0454120
nm0309107
nm1388074
nm1002207
nm1438146
nm0000174
nm0005112
nm0000225
nm0851582
nm0880484
nm0000149
nm0000375
nm0000174
nm1157358
nm1933035
nm4036323
nm0101526
nm0026364
nm0808917


nm0929489
nm0000164
nm0331516
nm0000657
nm0000158
nm0000210
nm0000450
nm1214435
nm0000132
nm1092227
nm1419440
nm0317725
nm0530365
nm0000149
nm0005024
nm0004710
nm0001774
nm1157358
nm0015196
nm0005476
nm0001767
nm0001131
nm0005315
nm0005351
nm0000501
nm0001518
nm0000542
nm0829576
nm0000120
nm0000515
nm0503567
nm0176981
nm0004695
nm0283945
nm0000194
nm0654110
nm0252230
nm0177933
nm0000202
nm0371660
nm0350453
nm0000375
nm0749263
nm0283945
nm0000686
nm0520064
nm0000100
nm0000353
nm0671487
nm0005517
nm0252961
nm0743896
nm0000204
nm0000163
nm0000867
nm0383603
nm0005392
nm0001667
nm0719637
nm0126284
nm0001015
nm0001618
nm0000242
nm0578949
nm0000126
nm0000193
nm0000458
nm0000136
nm0000307
nm0000614
nm0043855
nm1018679
nm1560274
nm0680983
nm0148418
nm0004950
nm0880484
nm0406975
nm0000084
nm0424060
nm0001473
nm0316079
nm0000671
nm0005405
nm0000215
nm0731075
nm0004883
nm1302735
nm0005048
nm0001315
nm0390229
nm0005476
nm0124930
nm0001065
nm0005476
nm0607375
nm1455681
nm0000621
nm0000535
nm0001803


nm0001648
nm0023832
nm0782561
nm0189887
nm0176869
nm0000223
nm0001416
nm0001473
nm0000450
nm0097842
nm0005458
nm0000579
nm0000501
nm0068166
nm2278177
nm2255271
nm0413168
nm0000191
nm0931329
nm0001427
nm0334179
nm0000257
nm0001449
nm0790688
nm0000102
nm0557859
nm0000366
nm0001687
nm1249256
nm0000189
nm0119993
nm1517976
nm0000614
nm0000597
nm2973632
nm2973596
nm2973590
nm0947338
nm0000375
nm0204706
nm0479471
nm1083271
nm0241049
nm0705356
nm0914612
nm0342488
nm0000799
nm2973712
nm0001652
nm0829576
nm1500155
nm1210124
nm0941777
nm0757855
nm0000244
nm0302108
nm0177896
nm0058581
nm1517976
nm0704270
nm0670408
nm0000702
nm0933988
nm0170306
nm0005380
nm0000491
nm0001459
nm2466842
nm0000113
nm0005210
nm0413168
nm0000630
nm0005351
nm0001774
nm0005562
nm0010736
nm0000113
nm0005351
nm0005460
nm0000131
nm0628601
nm0252230
nm0004874
nm0908094
nm0735442
nm0000598
nm1475594
nm0005541
nm0416673
nm1218757
nm1724323
nm0000553
nm1192254
nm0000463
nm0000120
nm0000198
nm0000147
nm0000158
nm0000191
nm0957909


nm0000146
nm0000098
nm0000867
nm0933940
nm0000152
nm0000332
nm0000160
nm0001803
nm0735442
nm0000134
nm1428821
nm1411125
nm0005256
nm0010736
nm0328828
nm0004395
nm0068338
nm0951148
nm0000244
nm0659363
nm0000207
nm0519043
nm0000223
nm0005278
nm0000377
nm0510168
nm1040365
nm1166613
nm0000329
nm0005520
nm0004854
nm0000237
nm0001667
nm0810738
nm0252961
nm0757855
nm0604742
nm0425005
nm0000671
nm1192254
nm0000702
nm0748620
nm0005562
nm0001451
nm0996669
nm1745736
nm0050156
nm0265668
nm2174090
nm0085312
nm1289434
nm0781981
nm0000128
nm0006969
nm0000553
nm0000906
nm0000194
nm0749263
nm1065229
nm0001845
nm0578788
nm0000138
nm0022931
nm1932136
nm0000112
nm0001917
nm0674742
nm1500155
nm0211087
nm3533310
nm0000106
nm0519043
nm0515296
nm0000147
nm0001691
nm0000307
nm0000409
nm0000222
nm2049403
nm0004778
nm0001631
nm0152035
nm0000191
nm0000112
nm0931404
nm0148418
nm0234668
nm0005443
nm1302735
nm1910274
nm1091701
nm0000179
nm0001845
nm0103797
nm0000250
nm0574534
nm0272706
nm2240346
nm1631269
nm0420955


nm1334869
nm1055413
nm0000243
nm0005351
nm0001598
nm1046097
nm1475594
nm0000554
nm1475594
nm1641117
nm1601397
nm0719637
nm0001838
nm0001570
nm0000255
nm0186505
nm0000422
nm1242688
nm0425005
nm0000323
nm0000243
nm1229204
nm0000332
nm2093097
nm0005517
nm0366389
nm0413168
nm0000285
nm0279545
nm0002071
nm0302108
nm0837177
nm4139037
nm0451234
nm1300009
nm0000230
nm2955013
nm1330276
nm0941777
nm0000553
nm0683253
nm0000136
nm0000201
nm1200692
nm2018237
nm1211488
nm0000353
nm0413168
nm0000128
nm0004266
nm0891514
nm0761052
nm0785594
nm0004937
nm0910607
nm0000138
nm0000242
nm0000610
nm0000295
nm0330687
nm0000246
nm1289434
nm1347153
nm0506405
nm0000612
nm0002907
nm2395937
nm0000553
nm2934314
nm0000210
nm2309517
nm2851530
nm3887625
nm0430107
nm0447695
nm0811242
nm2313103
nm0000658
nm0000169
nm0136797
nm0000295
nm1013003
nm2086223
nm1374980
nm2279940
nm0001100
nm0056187
nm0267506
nm0000604
nm0268199
nm0940158
nm0186505
nm0004755
nm0004989
nm0005405
nm2240346
nm0447695
nm2395586
nm0000702
nm1517976


nm0669681
nm1297015
nm0580351
nm0000152
nm0000160
nm0000365
nm2089090
nm0001803
nm0891786
nm0000154
nm2955013
nm0000198
nm0000148
nm0000354
nm0000531
nm1024677
nm0451148
nm1229940
nm0045393
nm0001845
nm0000291
nm1617685
nm1886602
nm0940362
nm0151419
nm0504897
nm0955471
nm1018221
nm0004462
nm0728938
nm0001376
nm0001136
nm0287182
nm0820053
nm0000602
nm4583512
nm3441152
nm0914612
nm1377561
nm0131781
nm1410318
nm0051509
nm0356017
nm0000980
nm2138653
nm0451321
nm0766470
nm0252961
nm0365140
nm1965907
nm1209966
nm1659547
nm0000422
nm0000602
nm1779870
nm0001804
nm3595501
nm4025993
nm4290762
nm0522306
nm0165749
nm1423952
nm0000195
nm0001473
nm0931404
nm0219206
nm0009918
nm0469823
nm1950086
nm2316017
nm3485845
nm1633541
nm2138653
nm3169069
nm2201555
nm2215447
nm0352778
nm2851530
nm0381723
nm0005515
nm0289142
nm0610325
nm0000169
nm0000199
nm0000686
nm0000273
nm1212722
nm0117709
nm0396924
nm0000112
nm0245988
nm1029222
nm1128572
nm0577329
nm1143861
nm2040932
nm1270857
nm2628285
nm1752221
nm1648381


nm2539953
nm0000126
nm0004742
nm7107400
nm0000867
nm0356017
nm0249291
nm0164809
nm7583273
nm0942876
nm0330687
nm0736622
nm3255459
nm0002006
nm5946818
nm3312554
nm0515116
nm0401264
nm0000148
nm0004395
nm0001057
nm0462712
nm2207222
nm1429380
nm0000257
nm0000182
nm4554428
nm0155693
nm5920962
nm0413168
nm1330560
nm0000702
nm0005527
nm0215682
nm0926165
nm2254074
nm2064412
nm0000545
nm0005351
nm0117709
nm0001132
nm0001749
nm0631490
nm2054764
nm4026000
nm1237541
nm0004978
nm0175305
nm1483196
nm1822659
nm5353321
nm3641002
nm1663205
nm2353862
nm0413168
nm1985859
nm1567113
nm1089991
nm0000602
nm0000560
nm0000668
nm2225369
nm0000134
nm0177896
nm0710447
nm0815370
nm3625321
nm0004874
nm3310211
nm0000704
nm1128572
nm0000112
nm0005562
nm0000658
nm0000177
nm0336701
nm0567031
nm3528539
nm4555381
nm0000553
nm0000438
nm1172478
nm0000188
nm0000473
nm0000422
nm0749263
nm0000474
nm1046097
nm1312575
nm0243233
nm1404239
nm2539953
nm1727304
nm1209966
nm0000288
nm0136797
nm0331516
nm1165110
nm0614165
nm0322407


nm0000194
nm0489858
nm2560232
nm1480656
nm6916321
nm0318821
nm0448204
nm2874732
nm3881333
nm2514879
nm0947338
nm1782299
nm0790057
nm0010169
nm1366568
nm0001745
nm1410318
nm6368720
nm0000616
nm0915637
nm0005438
nm5025909
nm5397459
nm3915784
nm0000434
nm0914612
nm1405398
nm1812656
nm2933757
nm1517976
nm0000705
nm0695435
nm0757855
nm1176985
nm4043618
nm0000474
nm0000375
nm0803889
nm5897057
nm6016511
nm1165110
nm1089991
nm0000949
nm0136797
nm1325419
nm0005295
nm0413168
nm0001772
nm6748436
nm0004874
nm0005458
nm0425005
nm0000255
nm2933757
nm0597388
nm7887725
nm4422686
nm0753314
nm5645519
nm0305558
nm0000973
nm0004715
nm0148418
nm0206257
nm2257207
nm4129745
nm0925966
nm0000285
nm0000114
nm0453994
nm0000136
nm0001691
nm0000849
nm0425005
nm2394794
nm0366389
nm1089991
nm0000168
nm0488953
nm0378245
nm0818055
nm1847117
nm0005562
nm2242399
nm0177933
nm0785227
nm0000437
nm0001872
nm0564215
nm5896355
nm4726634
nm0000242
nm0000164
nm0241049
nm5016878
nm0005562
nm4207679
nm0356021
nm0001451
nm0000586


nm4334711
nm0000545
nm3512758
nm7073477
nm0376716
nm0376540
nm1933128
nm0000190
nm8822933
nm2525790
nm0000182
nm1227814
nm0718957
nm0736872
nm0926202
nm1754526
nm5381254
nm0751518
nm7230577
nm0002071
nm0000604
nm0000146
nm0378245
nm0113117
nm8091769
nm0565250
nm0006969
nm0748973
nm3043279
nm0000358
nm0544334
nm3630119
nm4207146
nm6280112
nm0000288
nm0010736
nm0136797
nm0004715
nm0524839
nm0005169
nm0000313
nm7248827
nm0424848
nm1209966
nm0001426
nm0491259
nm0001029
nm0269077
nm0553648
nm3147751
nm1935086
nm1854664
nm0164809
nm0544718
nm1159180
nm1025139
nm1557329
nm5338858
nm1469236
nm1297015
nm0001838
nm0124930
nm0000198
nm0996669
nm2623492
nm1588066
nm0915989
nm2946516
nm7917803
nm0345695
nm3762213
nm3012064
nm6194955
nm0046029
nm10617765
nm10617766
nm3220568
nm0357979
nm3196216
nm3964350
nm0601553
nm0925966
nm5953898
nm10504739
nm10504740
nm1334869
nm2177933
nm2114889
nm2138653
nm3828984
nm1372788
nm3658958
nm2225545
nm3011011
nm0000602
nm0000729
nm0000651
nm6407242
nm4806678
nm5046

nm0002071
nm2046855
nm10321662
nm10030720
nm4836118
nm2365811
nm0005476
nm0054697
nm0515116
nm0104114
nm11042323
nm0000170
nm1388074
nm0000579
nm0384722
nm0000317
nm0265067
nm6543093
nm4223882
nm2064295
nm0001029
nm0440286
nm1230639
nm0000158
nm8023748
nm0040106
nm4374524
nm1194748
nm4714676
nm0000206
nm0935664
nm1102891
nm0197647
nm1677477
nm6205739
nm2588226
nm0004517
nm5994234
nm1659221
nm0837223
nm8313926
nm4055138
nm0000412
nm5347988
nm0005476
nm1013003
nm1591496
nm0947447
nm0012078
nm2185928
nm0959242
nm1645334
nm0001759
nm2353862
nm0491402
nm0842770
nm2934406
nm4552582
nm0428309
nm2142336
nm5897057
nm7186321
nm1405398
nm1555340
nm2611074
nm8624059
nm3841486
nm1975050
nm1659547
nm3102998
nm1555340
nm0000407
nm1822183
nm4563820
nm1504868
nm3640854
nm0497630
nm3400186
nm0390789
nm3314975
nm3729721
nm3725055
nm0740264
nm0627505
nm0607865
nm2757333
nm2925352
nm4712342
nm0479471
nm0420955
nm6341784
nm5007768
nm5157662
nm0416673
nm0570364
nm0412850
nm2228294
nm0870199
nm1557329
nm14842

nm0366389
nm0571952
nm8402992
nm4973896
nm6170168
nm9296746
nm6421259
nm0000160
nm0000117
nm0001073
nm0000274
nm0001467
nm1741002
nm5245722
nm0000706
nm3513533
nm0000130
nm0000123
nm1116918
nm0000210
nm0205626
nm9096847
nm2736476
nm0000130
nm5506858
nm0183921
nm1475594
nm9208180
nm3590810
nm0424216
nm0828177
nm0690686
nm0000104
nm0000161
nm2957490
nm1092086
nm0000491
nm0000350
nm0000849
nm9121761
nm6658398
nm5290643
nm0000906
nm0000110
nm6073955
nm4089170
nm1517976
nm0095017
nm0141697
nm1890784
nm3569284
nm0803889
nm0519043
nm0637586
nm0386752
nm0394690
nm0350453
nm7187850
nm0005517
nm0000146
nm5896355
nm0396558
nm4533533
nm2594314
nm2573928
nm0002907
nm0000173
nm0051903
nm0071304
nm1102891
nm2089814
nm13952911
nm3678893
nm2748615
nm2812026
nm6015235
nm0675730
nm0350453
nm5584344
nm2555462
nm0000182
nm0005562
nm8349062
nm0000242
nm0000154
nm0915865
nm0000115
nm0050959
nm1840504
nm3718007
nm1412974
nm2325956
nm1859932
nm10682687
nm2111303
nm0000932
nm0933940
nm4263213
nm0148418
nm000016

In [149]:
# write data to csv: movies_clean_v2.csv
df.to_csv('movies_clean_v2.csv', index=False)

In [2]:
df = pd.read_csv('movies_clean_v3.csv')
df.head()

Unnamed: 0,title,year,rated,released,runtime,genre,director,writer,actors,plot,...,budget,num_directors,num_actors,actor_rev5,actor_rev10,director_films5,actor_films5,actor_films10,director_rev10,director_films10
0,Spider-Man,2002,PG-13,2002-05-03,121,Action,Sam Raimi,"Stan Lee, Steve Ditko, David Koepp","Tobey Maguire, Kirsten Dunst, Willem Dafoe",After being bitten by a genetically-modified s...,...,139000000.0,1,3,0,0,0,0,0,0,0
1,Star Wars: Episode II - Attack of the Clones,2002,PG,2002-05-16,142,Action,George Lucas,"George Lucas, Jonathan Hales","Hayden Christensen, Natalie Portman, Ewan McGr...","Ten years after initially meeting, Anakin Skyw...",...,115000000.0,1,3,108638745,108638745,0,1,1,0,0
2,Harry Potter and the Chamber of Secrets,2002,PG,2002-11-15,161,Adventure,Chris Columbus,"J.K. Rowling, Steve Kloves","Daniel Radcliffe, Rupert Grint, Emma Watson",An ancient prophecy seems to be coming true wh...,...,100000000.0,1,3,0,0,0,0,0,0,0
3,Signs,2002,PG-13,2002-08-02,106,Drama,M. Night Shyamalan,M. Night Shyamalan,"Mel Gibson, Joaquin Phoenix, Rory Culkin",A widowed former reverend living with his chil...,...,72000000.0,1,3,78122718,78122718,0,1,1,0,0
4,My Big Fat Greek Wedding,2002,PG,2002-08-02,95,Comedy,Joel Zwick,Nia Vardalos,"Nia Vardalos, John Corbett, Michael Constantine",A young Greek woman falls in love with a non-G...,...,5000000.0,1,3,0,0,0,0,0,0,0


In [6]:
# convert directorid and actorids to list of strings

# Define a lambda function that safely evaluates the input
def safe_eval(x):
    try:
        return ast.literal_eval(x) if x and len(str(x)) > 9 else [x]
    except (SyntaxError, ValueError):
        return [x]

# Apply the modified lambda function to the DataFrame
df['directorid'] = df['directorid'].apply(safe_eval)
df['actorid'] = df['actorid'].apply(lambda x: ast.literal_eval(x) if x and len(str(x)) > 9 else [x])
# df['directorid'] = df['directorid'].apply(lambda x: ast.literal_eval(x) if x and len(str(x)) > 9 else [x])

In [5]:
# convert released column from string to datetime object
df['released'] = pd.to_datetime(df['released'], format='%Y-%m-%d')

In [7]:
# calculate total films
def calculate_total_films(row, column, years):
    date = row['released']
    print(row[column])
 
    strings = row[column]
    
    
    # Calculate the cutoff date 5 years before the current date
    cutoff_date = date - pd.DateOffset(years=years)
    
    total_films = 0
        
    for string in strings:
        # Filter rows with dates within the past 5 years and matching string
        filtered_rows = df[(df['released'] >= cutoff_date) & (df['released'] < date) & (df[column].apply(lambda x: string in x))]
        
        # Sum the revenue for the filtered rows
        films = filtered_rows['title'].count()
        
        total_films += films
    
    return total_films

In [15]:
# Apply the function to each row and store the result in a new column
df['director_films5'] = df.apply(calculate_total_films, args={'directorid', 5}, axis=1)
# df['director_films10'] = df.apply(calculate_total_films, args={'directorid', 10}, axis=1)
# df['actor_films5'] = df.apply(calculate_total_films, args={'actorid', 5}, axis=1)
# df['actor_films10'] = df.apply(calculate_total_films, args={'actorid', 10}, axis=1)

In [10]:
df['director_films10'] = df.apply(calculate_total_films, args={'directorid', 10}, axis=1)

['nm0000600']
['nm0000184']
['nm0001060']
['nm0796117']
['nm0959034']
['nm0001392']
['nm0005366']
['nm0001756']
['nm0917188', 'nm0757858']
['nm0000165']
['nm0331532']
['nm0848414']
['nm0213450', 'nm0761498']
['nm0003418']
['nm0501185']
['nm0000229']
['nm0893659']
['nm0109359']
['nm0855035']
['nm0510731']
['nm0004675']
['nm0000436']
['nm0000631']
['nm0005222']
['nm0000399']
['nm0711840']
['nm0751080']
['nm0001675']
['nm0868219']
['nm0505152']
['nm0908824']
['nm1103162']
['nm0359387']
['nm0038432', 'nm0177170']
['nm0001024']
['nm0451884']
['nm0634240']
['nm0585011']
['nm0591450']
['nm0871860']
['nm0000229']
['nm0911061']
['nm0920425']
['nm0721817']
['nm0001490']
['nm0776271']
['nm1019493']
['nm0492909']
['nm1180460', 'nm0177015']
['nm0506613']
['nm0027572']
['nm0101385']
['nm0002083']
['nm0919363', 'nm0919369']
['nm0788202']
['nm0440458']
['nm0000247']
['nm0000265']
['nm0007082']
['nm0086690']
['nm0625458']
['nm0027271']
['nm0001112']
['nm0000776']
['nm0831690']
['nm0002700']
['nm0223359

['nm0950226']
['nm1600805']
['nm0108132']
['nm0170719']
['nm0001681']
['nm0724938']
['nm0378893']
['nm1369705']
['nm0000127']
['nm0082802']
['nm0007082']
['nm0138927']
['nm0632893']
['nm0002700']
['nm0087904']
['nm0159507']
['nm0957772']
['nm0349183']
['nm0139867']
['nm0743093']
['nm1301035']
['nm0362734']
['nm0000200']
['nm0402365']
['nm0000464']
['nm0776271']
['nm0893659']
['nm0000487']
['nm0000229']
['nm0587955']
['nm0362566']
['nm0572562']
['nm0881038']
['nm0001716']
['nm0001093']
['nm0875793']
['nm0905592']
['nm0834893']
['nm0000601']
['nm0002120']
['nm0562645']
['nm1802009']
['nm0000116', 'nm0702797']
['nm0006960']
['nm0001097']
['nm0149446']
['nm0000965']
['nm0422710', 'nm0699037']
['nm0562266']
['nm0184729']
['nm0796864']
['nm0386570']
['nm0000228']
['nm0093051']
['nm0000876']
['nm0823721']
['nm0594503']
['nm1272773']
['nm0001317']
['nm0000466']
['nm0000948']
['nm0316795']
['nm0126032']
['nm0432380']
['nm0000095']
['nm0705535']
['nm0014960']
['nm0000264']
['nm0286975']
['nm1498

['nm1729171']
['nm0565336']
['nm0379179']
['nm1890845']
['nm0591994']
['nm0000490']
['nm1481493']
['nm0552140']
['nm1732981']
['nm2021984']
['nm0523094']
['nm0212137']
['nm0711149']
['nm0001878']
['nm1291105']
['nm0937748']
['nm0418982']
['nm0146341']
['nm0133899']
['nm0001710']
['nm0000142']
['nm0091076']
['nm0005363']
['nm0000217']
['nm0752328']
['nm1580724']
['nm0773603']
['nm0281598']
['nm0420982']
['nm0093051']
['nm1834176']
['nm2251947']
['nm0008953']
['nm0000165']
['nm1749112', 'nm2277869']
['nm0138706']
['nm0589168']
['nm0908223', 'nm0963216']
['nm0689343']
['nm0000881']
['nm0946734']
['nm0230032', 'nm0677037']
['nm0919363']
['nm0000116']
['nm0680846']
['nm0009190']
['nm1224299', 'nm0970447']
['nm0757858', 'nm0862211']
['nm0359387']
['nm0004303']
['nm0506613']
['nm0281945']
['nm0000386']
['nm0510912']
['nm0814085']
['nm0139867']
['nm0603628']
['nm0000709']
['nm0000165']
['nm0629334']
['nm0520488', 'nm0588087']
['nm0858525']
['nm0000233']
['nm0947087']
['nm0088955']
['nm0082526'

['nm0466349']
['nm0426059']
['nm1347153']
['nm0000916']
['nm0802248']
['nm2503633']
['nm0601337']
['nm0291205']
['nm0617523', 'nm0825407']
['nm0382956']
['nm0153078']
['nm0936482']
['nm1375358', 'nm1376383']
['nm2752098', 'nm0271402']
['nm0629334']
['nm1294036']
['nm1220140']
['nm1160962', 'nm1413364']
['nm0068587']
['nm0373282']
['nm0004410', 'nm0962729']
['nm0138620']
['nm0376006']
['nm0097079']
['nm0101047']
['nm0220600']
['nm0003160']
['nm0000231']
['nm0006960']
['nm0027572']
['nm0125803', 'nm0268380']
['nm0178997']
['nm0103187']
['nm0027271']
['nm1206844']
['nm0428600']
['nm0043742']
['nm0031976']
['nm0788202']
['nm0067457']
['nm0384825']
['nm1890845']
['nm0520749']
['nm1676223']
['nm1228976']
['nm1347153']
['nm0000318']
['nm0275698']
['nm1625338', 'nm2769225']
['nm0867127']
['nm0197636']
['nm0520485', 'nm0627920']
['nm0333804']
['nm1630273']
['nm0288144', 'nm1768412']
['nm0831557']
['nm0751102']
['nm0878756', 'nm0905154', 'nm0905152']
['nm1455688']
['nm0003418']
['nm0193838', 'nm

['nm3734458']
['nm0310673']
['nm1442514']
['nm0393799']
['nm1460159']
['nm0054744']
['nm0000128']
['nm0000095']
['nm0001331']
['nm0484907']
['nm0950426']
['nm0573732']
['nm3273000']
['nm1318596', 'nm0169806']
['nm0170043']
['nm0005139']
['nm0678857']
['nm0015295']
['nm0994553']
['nm2399862']
['nm0001469']
['nm1242054']
['nm1167933']
['nm0346550']
['nm0417054']
['nm0000490']
['nm0888743']
['nm0000876']
['nm0001880']
['nm3011011', 'nm1888091']
['nm0004056', 'nm0533691']
['nm2284484']
['nm0751577', 'nm0751648']
['nm0719208']
['nm0269463']
['nm1783265']
['nm0397174', 'nm0601781', 'nm1158544']
['nm0811583']
['nm0043742']
['nm0220600']
['nm0946734']
['nm0166256', 'nm0615780', 'nm2320658']
['nm0327944']
['nm0339030']
['nm0510912']
['nm0001741']
['nm1134029', 'nm2888554']
['nm0593610', 'nm0230666']
['nm1868917', 'nm0950775']
['nm0082450']
['nm1098493']
['nm0946734']
['nm0000142']
['nm0524190', 'nm0601859']
['nm0443505', 'nm0717678']
['nm0000386']
['nm1490123']
['nm0970447', 'nm0862911']
['nm02

['nm0026442']
['nm0003482']
['nm3147478', 'nm0754817']
['nm1256143']
['nm0175305']
['nm2788015']
['nm10201503']
['nm0950426']
['nm1557594']
['nm4175844']
['nm0780678']
['nm3686896']
['nm11475835']
['nm0133326']
['nm0001707']
['nm0673400']
['nm0002041']
['nm0655683']
['nm0788171']
['nm0899501']
['nm0002191']
['nm0751577', 'nm0751648']
['nm0269463']
['nm2155757']
['nm0118333', 'nm1601644']
['nm1349818', 'nm0281396']
['nm0009190']
['nm1218281']
['nm0005363']
['nm0680846']
['nm0615592']
['nm0440458']
['nm1443502']
['nm0500610']
['nm0821432']
['nm0213450']
['nm0719208', 'nm3646390']
['nm1224299']
['nm0000233']
['nm2497546']
['nm0426059']
['nm0000318']
['nm1461392']
['nm0796117']
['nm1002424']
['nm1139726']
['nm0003506']
['nm0593610']
['nm1032521']
['nm0862911', 'nm0970447']
['nm0257554']
['nm0002077']
['nm0001675']
['nm1969144']
['nm0336620']
['nm2477891']
['nm0000965']
['nm1347153']
['nm0281508']
['nm0006846']
['nm0004217']
['nm1334526']
['nm1783265']
['nm0191717', 'nm1392282']
['nm0090386

['nm14050113']
['nm3255797']
['nm0775056']
['nm0276062']
['nm7530291']
['nm0068587']
['nm0004716']
['nm2114842']
['nm1193346']
['nm1145983']
['nm0711745']
['nm0605137']
['nm1128037']
['nm0396074']
['nm5100377']
['nm0942504']
['nm2378914']
['nm2441699']
['nm0003620']
['nm1258686']
['nm6073824']
['nm5039000']
['nm8015244']
['nm1051221']
['nm1103496']
['nm0000343']
['nm0358327']
['nm0285379', 'nm0938645']
['nm0000264']
['nm3621304']
['nm1771279']
['nm0537923']
['nm1993322']
['nm3013234']
['nm3515746']
['nm0336695']
['nm2379696']
['nm7992231']
['nm0573732']
['nm0844333', 'nm0960867']
['nm2566836', 'nm2575525']
['nm0585011']
['nm1033668']
['nm1892252']
['nm8242386', 'nm1625338', 'nm6583447']
['nm0795290']
['nm1362893']
['nm0661791']
['nm3902889']
['nm3834799']
['nm2500541']
['nm2393385']
['nm0002739', 'nm0325603']
['nm0757975']
['nm3262433']
['nm0001392']
['nm0660528']
['nm0005222']
['nm1351912']
['nm1568265']
['nm3482943']
['nm2270180']
['nm1780037']
['nm0535340', 'nm1532509', 'nm0875453']

In [16]:
df['actor_films5'] = df.apply(calculate_total_films, args={'actorid', 5}, axis=1)
df['actor_films10'] = df.apply(calculate_total_films, args={'actorid', 10}, axis=1)

In [21]:
df.to_csv('movies_w_actors_temp.csv', index=False)

In [17]:
df.columns

Index(['title', 'year', 'rated', 'released', 'runtime', 'genre', 'director',
       'writer', 'actors', 'plot', 'language', 'country', 'awards', 'poster',
       'ratings', 'metascore', 'imdbrating', 'imdbvotes', 'imdbid', 'type',
       'dvd', 'boxoffice', 'production', 'website', 'response', 'directorid',
       'actorid', 'budget', 'num_directors', 'num_actors', 'actor_rev5',
       'actor_rev10', 'director_films5', 'actor_films5', 'actor_films10'],
      dtype='object')

In [2]:
df = pd.read_csv('movies_w_actors_temp.csv')

In [20]:
df.to_csv('movies_clean_v3.csv', index=False)

In [3]:
d5 = pd.read_csv('d5.csv')
len(d5)

3657

In [7]:
df['director_rev5'] = d5

In [10]:
df.director_rev5.describe()

count    3.657000e+03
mean     5.397493e+07
std      1.196838e+08
min      0.000000e+00
25%      0.000000e+00
50%      0.000000e+00
75%      5.701298e+07
max      2.173800e+09
Name: director_rev5, dtype: float64

In [14]:
df.query("director_rev5 > 0").shape[0]

1619

In [15]:
df.to_csv('movies_clean_v4.csv', index=False)