In [1]:
import json
import pandas as pd
import numpy as np

import re

from sqlalchemy import create_engine
import psycopg2

# from config import db_password

import time

In [2]:
#  Add the clean movie function that takes in the argument, "movie".
# 1. Add the clean movie function that takes in the argument, "movie".
alternate_list = ("Arabic, Cantonese, Chinese, French, Hangul, Hebrew, Hepburn, Japanese, Literally,  Mandarin, McCune–Reischauer, Original title, Polish,  Revised Romanization, Romanized, Russian, Simplified, Traditional, Yiddish".split(", "))
alternate_titles = {}
def clean_movie(movie):
        
    modded_movie = dict(movie) #create a non-destructive copy
    for column_key in alternate_list:
        if (column_key in movie):
            alternate_titles[column_key] = movie[column_key]
            movie.pop(column_key)
        if len(alternate_titles) >  0:
            modded_movie['alternate_titles'] = alternate_titles
    
    # nested function
    def change_column_name(old_name, new_name):
        if old_name in movie:
            movie[new_name] = movie.pop(old_name)
    # call nested function
    change_column_name('Adaptation by', 'Writer(s)')
    change_column_name('Country of origin', 'Country')
    change_column_name('Directed by', 'Director')
    change_column_name('Distributed by', 'Distributor')
    change_column_name('Edited by', 'Editor(s)')
    change_column_name('Length', 'Running time')
    change_column_name('Original release', 'Release date')
    change_column_name('Music by', 'Composer(s)')
    change_column_name('Produced by', 'Producer(s)')
    change_column_name('Producer', 'Producer(s)')
    change_column_name('Productioncompanies ', 'Production company(s)')
    change_column_name('Productioncompany ', 'Production company(s)')
    change_column_name('Released', 'Release Date')
    change_column_name('Release Date', 'Release date')
    change_column_name('Screen story by', 'Writer(s)')
    change_column_name('Screenplay by', 'Writer(s)')
    change_column_name('Story by', 'Writer(s)')
    change_column_name('Theme music composer', 'Composer(s)')
    change_column_name('Written by', 'Writer(s)')
    return modded_movie

In [30]:
# 1 Add the function that takes in three arguments;
# Wikipedia data, Kaggle metadata, and MovieLens rating data (from Kaggle)

def extract_transform_load():
    # Read in the kaggle metadata and MovieLens ratings CSV files as Pandas DataFrames.
    kaggle_metadata = pd.read_csv(f'{kaggle_file}')
    ratings = pd.read_csv(f'{ratings_file}')

    # Open and read the Wikipedia data JSON file.
    with open(wiki_file, mode='r') as file:
        wiki_movies_raw = json.load(file)
    #wiki_movies_df = pd.DataFrame(wiki_movies_raw)
    
    # 3. Write a list comprehension to filter out TV shows.
    wiki_movies = [movie for movie in wiki_movies_raw
               if 'No. of episodes' not in movie]

    # 4. Write a list comprehension to iterate through the cleaned wiki movies list
    # and call the clean_movie function on each movie.
    clean_movies = [clean_movie(movie) for movie in wiki_movies]

    # 5. Read in the cleaned movies list from Step 4 as a DataFrame.
    wiki_movies_df = pd.DataFrame(clean_movies)

    # 6. Write a try-except block to catch errors while extracting the IMDb ID using a regular expression string and
    #  dropping any imdb_id duplicates. If there is an error, capture and print the exception.
    mylist = []
    for row in wiki_movies_df['imdb_link']:
        try:
            newid = re.search(r'(tt\d{7})', row).group(0)
            print(newid)
            mylist.append(newid)
        except:
            mylist.append("")
            print(f"{row} nope")
    wiki_movies_df['imdb_id'] = mylist
    
    wiki_movies_df.drop_duplicates(subset='imdb_id', inplace=True)

    #  7. Write a list comprehension to keep the columns that don't have null values from the wiki_movies_df DataFrame.
    wiki_columns_to_keep =[column for column in wiki_movies_df.columns if wiki_movies_df[column].isnull().sum() < len(wiki_movies_df) * 0.9]
    wiki_movies_df = wiki_movies_df[wiki_columns_to_keep]

    # 8. Create a variable that will hold the non-null values from the “Box office” column.
    box_office = wiki_movies_df['Box office'].dropna()
    
    # 9. Convert the box office data created in Step 8 to string values using the lambda and join functions.
    box_office[box_office.map(lambda x: type(x) != str)]
    box_office = box_office.apply(lambda x: ' '.join(x) if type(x) == list else x)
    box_office = box_office.str.replace(r'\$.*[-—–](?![a-z])', '$', regex=True)

    # 10. Write a regular expression to match the six elements of "form_one" of the box office data.
    form_one = r'\$\s*\d+\.?\d*\s*[mb]illi?on'
    matches_form_one = box_office.str.contains(form_one, flags=re.IGNORECASE, na=False)
    
    # 11. Write a regular expression to match the three elements of "form_two" of the box office data.
    form_two = r'\$\s*\d{1,3}(?:[,\.]\d{3})+(?!\s[mb]illion)'
    matches_form_two = box_office.str.contains(form_two, flags=re.IGNORECASE, na=False)
    
    # 12. Add the parse_dollars function.
    def parse_dollars(s):
        # if s is not a string, return NaN
        if type(s) != str:
            return np.nan

        # if input is of the form $###.# million
        if re.match(r'\$\s*\d+\.?\d*\s*milli?on', s, flags=re.IGNORECASE):

            # remove dollar sign and " million"
            s = re.sub('\$|\s|[a-zA-Z]','', s)

            # convert to float and multiply by a million
            value = float(s) * 10**6

            # return value
            return value

        # if input is of the form $###.# billion
        elif re.match(r'\$\s*\d+\.?\d*\s*billi?on', s, flags=re.IGNORECASE):

            # remove dollar sign and " billion"
            s = re.sub('\$|\s|[a-zA-Z]','', s)

            # convert to float and multiply by a billion
            value = float(s) * 10**9

            # return value
            return value

        # if input is of the form $###,###,###
        elif re.match(r'\$\s*\d{1,3}(?:[,\.]\d{3})+(?!\s[mb]illion)', s, flags=re.IGNORECASE):

            # remove dollar sign and commas
            s = re.sub('\$|,','', s)

            # convert to float
            value = float(s)

            # return value
            return value

        # otherwise, return NaN
        else:
            return np.nan
    
        
    # 13. Clean the box office column in the wiki_movies_df DataFrame.
    wiki_movies_df['box_office'] = box_office.str.extract(f'({form_one}|{form_two})', flags=re.IGNORECASE)[0].apply(parse_dollars)
    wiki_movies_df.drop('Box office', axis=1, inplace=True)
    
    
    # 14. Clean the budget column in the wiki_movies_df DataFrame.
    budget = wiki_movies_df['Budget'].dropna()
    budget = budget.map(lambda x: ' '.join(x) if type(x) == list else x)
    budget = budget.str.replace(r'\$.*[-—–](?![a-z])', '$', regex=True)
    budget = budget.str.replace(r'\[\d+\]\s*', '')
    matches_form_one_budget = budget.str.contains(form_one, flags=re.IGNORECASE, na=False)
    matches_form_two_budget = budget.str.contains(form_two, flags=re.IGNORECASE, na=False)
    budget[~matches_form_one_budget & ~matches_form_two_budget]
    wiki_movies_df['budget'] = budget.str.extract(f'({form_one}|{form_two})', flags=re.IGNORECASE)[0].apply(parse_dollars)
    wiki_movies_df.drop('Budget', axis=1, inplace=True)
    
    
    # 15. Clean the release date column in the wiki_movies_df DataFrame.
    release_date = wiki_movies_df['Release date'].dropna().apply(lambda x: ' '.join(x) if type(x) == list else x)
    date_form_one = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s[123]?\d,\s\d{4}'
    date_form_two = r'\d{4}.[01]\d.[0123]\d'
    date_form_three = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{4}'
    date_form_four = r'\d{4}'
    release_date.str.extract(f'({date_form_one}|{date_form_two}|{date_form_three}|{date_form_four})', flags=re.IGNORECASE)
    wiki_movies_df['release_date'] = pd.to_datetime(release_date.str.extract(f'({date_form_one}|{date_form_two}|{date_form_three}|{date_form_four})')[0], infer_datetime_format=True)


    # 16. Clean the running time column in the wiki_movies_df DataFrame.
    running_time = wiki_movies_df['Running time'].dropna().apply(lambda x: ' '.join(x) if type(x) == list else x)
    running_time.str.contains(r'^\d*\s*minutes$', flags=re.IGNORECASE, na=False).sum()
    running_time[running_time.str.contains(r'^\d*\s*minutes$', flags=re.IGNORECASE, na=False) != True]
    running_time_extract = running_time.str.extract(r'(\d+)\s*ho?u?r?s?\s*(\d*)|(\d+)\s*m')
    running_time_extract = running_time_extract.apply(lambda col: pd.to_numeric(col, errors='coerce')).fillna(0)
    wiki_movies_df['running_time'] = running_time_extract.apply(lambda row: row[0]*60 + row[1] if row[2] == 0 else row[2], axis=1)
    wiki_movies_df.drop('Running time', axis=1, inplace=True)
    
     
    # 2. Clean the Kaggle metadata.
    kaggle_metadata = kaggle_metadata[kaggle_metadata['adult'] == 'False'].drop('adult',axis='columns')
    kaggle_metadata['video'] = kaggle_metadata['video'] == 'True'
    kaggle_metadata['budget'] = kaggle_metadata['budget'].astype(int)
    kaggle_metadata['id'] = pd.to_numeric(kaggle_metadata['id'], errors='raise')
    kaggle_metadata['popularity'] = pd.to_numeric(kaggle_metadata['popularity'], errors='raise')
    kaggle_metadata['release_date'] = pd.to_datetime(kaggle_metadata['release_date'])
    
    # 3. Merged the two DataFrames into the movies DataFrame.
    movies_df = pd.merge(wiki_movies_df, kaggle_metadata, on='imdb_id', suffixes=['_wiki','_kaggle'])

    
    # 4. Drop unnecessary columns from the merged DataFrame.
    movies_df.drop(columns=['title_wiki','release_date_wiki','Language'], inplace=True)

    
    # 5. Add in the function to fill in the missing Kaggle data.
    def fill_missing_kaggle_data(df, kaggle_column, wiki_column):
        df[kaggle_column] = df.apply(
            lambda row: row[wiki_column] if row[kaggle_column] == 0 else row[kaggle_column]
            , axis=1)
        df.drop(columns=wiki_column, inplace=True)


    # 6. Call the function in Step 5 with the DataFrame and columns as the arguments.
    fill_missing_kaggle_data(movies_df, 'runtime', 'running_time')
    fill_missing_kaggle_data(movies_df, 'budget_kaggle', 'budget_wiki')
    fill_missing_kaggle_data(movies_df, 'revenue', 'box_office')
    movies_df.columns
    # 7. Filter the movies DataFrame for specific columns.
    movies_df = movies_df.loc[:, ['imdb_id','id','title_kaggle','original_title','tagline','belongs_to_collection','url','imdb_link',
                       'runtime','budget_kaggle','revenue','release_date_kaggle','popularity','vote_average','vote_count',
                       'genres','original_language','overview','spoken_languages','Country',
                       'production_companies','production_countries',
                       'Starring','Cinematography','Based on'
                      ]]

    # 8. Rename the columns in the movies DataFrame.
    movies_df.rename({'id':'kaggle_id',
                  'title_kaggle':'title',
                  'url':'wikipedia_url',
                  'budget_kaggle':'budget',
                  'release_date_kaggle':'release_date',
                  'Country':'country',
                  'Distributor':'distributor',
                  'Producer(s)':'producers',
                  'Director':'director',
                  'Starring':'starring',
                  'Cinematography':'cinematography',
                  'Editor(s)':'editors',
                  'Writer(s)':'writers',
                  'Composer(s)':'composers',
                  'Based on':'based_on'
                 }, axis='columns', inplace=True)

    # 9. Transform and merge the ratings DataFrame.
    rating_counts = ratings.groupby(['movieId','rating'], as_index=False).count() \
        .rename({'userId':'count'}, axis=1) \
        .pivot(index='movieId',columns='rating', values='count')
    rating_counts.columns = ['rating_' + str(col) for col in rating_counts.columns]
    movies_with_ratings_df = pd.merge(movies_df, rating_counts, left_on='kaggle_id', right_index=True, how='left')
    
    movies_with_ratings_df[rating_counts.columns] = movies_with_ratings_df[rating_counts.columns].fillna(0)

    
    return wiki_movies_df, movies_with_ratings_df, movies_df


In [31]:
# 10. Create the path to your file directory and variables for the three files.
file_dir = './Resources'
# Wikipedia data
wiki_file = f'{file_dir}/wikipedia.movies.json'
# Kaggle metadata
kaggle_file = f'{file_dir}/movies_metadata.csv'
# MovieLens rating data.
ratings_file = f'{file_dir}/ratings.csv'

In [32]:
# 11. Set the three variables equal to the function created in D1.
wiki_file, kaggle_file, ratings_file = extract_transform_load()

  if (await self.run_code(code, result,  async_=asy)):


tt0098987
tt0098994
tt0099005
tt0099012
tt0099018
tt0099026
tt0099028
tt0101326
tt0099037
tt0099039
tt0099044
tt0099052
tt0099073
tt0099077
tt0099088
tt0099091
tt0099108
tt0099128
tt0099141
tt0099148
tt0099155
tt0099160
tt0099165
tt0099180
tt0101531
tt0099204
tt0099212
tt0103923
tt0154129
tt0096875
tt0099237
tt0099245
tt0099252
tt0099253
tt0101578
tt0099259
tt0099260
tt0099277
tt0099284
tt0099291
tt0099310
tt0099313
tt0099316
tt0099329
tt0099342
tt0099348
nan nope
tt0099364
tt0099365
tt0099368
tt0099371
tt0109575
nan nope
tt0099390
tt0099391
tt0099395
tt0099399
tt0101707
tt0099409
tt0099422
tt0099423
tt0127514
tt0168701
tt0099460
tt0099462
tt0099472
tt0099487
tt0101812
tt0099512
tt0099520
tt0099528
tt0121262
tt0099546
tt0101876
tt0099575
tt0099578
tt0099581
tt0099582
tt0099595
tt0099606
tt0099611
tt0099612
tt0099615
tt0099622
tt0099623
tt0099648
tt0099653
tt0099654
tt0099656
tt0099664
tt0099674
tt0099685
tt0099691
tt0099699
tt0099700
tt0099703
tt0099704
tt0099710
tt0099720
tt0101988
tt

tt0203119
tt0189998
tt0162650
tt0184894
tt0212235
tt0192614
tt0196216
tt0205418
tt0208092
tt0184907
tt0210299
tt0240912
tt0186566
tt0181836
tt0161216
tt0134983
tt0234853
tt0211661
tt0210358
tt0146309
tt0205461
tt0170691
tt0220099
tt0220100
tt0120913
tt0181865
tt0176244
tt0216772
tt0141926
tt0217869
tt0164212
tt0153464
tt0192731
tt0190865
tt0199129
tt0127349
tt0204626
tt0202677
tt0161081
tt0181151
tt0207201
tt0202402
tt0198021
tt0149367
tt0174336
tt0190138
tt0206420
tt0185014
tt0120903
tt0138946
tt0203230
tt0179626
nan nope
tt0233142
tt0212720
tt0242252
tt0248667
tt0250202
tt0164334
tt0213446
tt0265029
tt0243759
tt0244000
tt0252866
tt0221799
tt0280424
tt0225071
tt0255798
tt0254099
tt0218817
tt0230011
tt0255819
tt0219965
tt0230025
tt0268978
tt0159273
tt0247199
tt0260746
tt0265086
tt0265087
tt0221027
tt0166110
nan nope
tt0297034
tt0250274
tt0258470
tt0252299
tt0242193
tt0270259
tt0238112
tt0266391
tt0239395
tt0182000
tt0240402
tt0250310
tt0254199
tt0250224
tt0231402
tt0196267
tt0256524
tt

tt0854678
tt0811106
tt0469494
tt0469623
tt0453556
tt0418279
tt0467110
tt0452702
tt0473308
tt0841046
tt0499556
tt0912599
tt0760329
tt0923985
tt1034469
tt0498399
tt0439876
nan nope
tt0435528
tt0496436
tt0785077
tt0906108
tt0486946
tt0486051
tt0756729
tt0872236
tt0796375
tt0481797
tt0443706
tt0443649
tt0478087
tt0988595
tt0411061
tt0809504
tt1190617
tt0486259
tt0800308
tt0470679
tt0871426
tt0364970
tt1153690
tt0814022
tt0799934
tt0960731
tt1178640
tt1157685
tt1014775
tt0758774
tt0397892
tt0914798
tt0844286
tt0887883
tt1042877
tt0824747
tt0460745
tt0892255
tt1024715
tt0499448
tt0970411
tt1060277
tt0844671
tt0997047
tt0421715
tt0468569
tt0970416
tt0446676
tt0848281
tt0452608
tt0800240
tt1034303
tt0832266
tt0361500
tt1213644
tt0483607
tt0918927
tt0817538
tt1059786
tt0974554
tt1091617
tt0469903
tt0406759
tt1142798
tt1117385
tt0961108
tt1129423
tt0486578
tt1054588
tt0770752
tt0865556
tt0840322
tt0800039
tt0369436
tt0870111
tt0978759
tt0808279
tt1252486
tt0828393
tt0425061
tt0995039
tt1205489
t

  budget = budget.str.replace(r'\[\d+\]\s*', '')


In [33]:
# 12. Set the DataFrames from the return statement equal to the file names in Step 11. 
wiki_movies_df = wiki_file
movies_with_ratings_df = kaggle_file
movies_df = ratings_file

In [34]:
# 13. Check the wiki_movies_df DataFrame. 
wiki_movies_df.head()

Unnamed: 0,url,year,imdb_link,title,Directed by,Produced by,Screenplay by,Story by,Based on,Starring,...,Country,Language,alternate_titles,Written by,Productioncompanies,imdb_id,box_office,budget,release_date,running_time
0,https://en.wikipedia.org/wiki/The_Adventures_o...,1990.0,https://www.imdb.com/title/tt0098987/,The Adventures of Ford Fairlane,Renny Harlin,"[Steve Perry, Joel Silver]","[David Arnott, James Cappe, Daniel Waters]","[David Arnott, James Cappe]","[Characters, by Rex Weiner]","[Andrew Dice Clay, Wayne Newton, Priscilla Pre...",...,United States,English,"{'Traditional': '我們誕生在中國', 'Simplified': '我们诞生...",,,tt0098987,21400000.0,20000000.0,1990-07-11,102.0
1,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",1990.0,https://www.imdb.com/title/tt0098994/,"After Dark, My Sweet",James Foley,"[Ric Kidney, Robert Redlin]","[James Foley, Robert Redlin]",,"[the novel, After Dark, My Sweet, by, Jim Thom...","[Jason Patric, Rachel Ward, Bruce Dern, George...",...,United States,English,"{'Traditional': '我們誕生在中國', 'Simplified': '我们诞生...",,,tt0098994,2700000.0,6000000.0,1990-05-17,114.0
2,https://en.wikipedia.org/wiki/Air_America_(film),1990.0,https://www.imdb.com/title/tt0099005/,Air America,Roger Spottiswoode,Daniel Melnick,"[John Eskow, Richard Rush]",,"[Air America, by, Christopher Robbins]","[Mel Gibson, Robert Downey Jr., Nancy Travis, ...",...,United States,"[English, Lao]","{'Traditional': '我們誕生在中國', 'Simplified': '我们诞生...",,,tt0099005,57718089.0,35000000.0,1990-08-10,113.0
3,https://en.wikipedia.org/wiki/Alice_(1990_film),1990.0,https://www.imdb.com/title/tt0099012/,Alice,Woody Allen,Robert Greenhut,,,,"[Alec Baldwin, Blythe Danner, Judy Davis, Mia ...",...,United States,English,"{'Traditional': '我們誕生在中國', 'Simplified': '我们诞生...",Woody Allen,,tt0099012,7331647.0,12000000.0,1990-12-25,106.0
4,https://en.wikipedia.org/wiki/Almost_an_Angel,1990.0,https://www.imdb.com/title/tt0099018/,Almost an Angel,John Cornell,John Cornell,,,,"[Paul Hogan, Elias Koteas, Linda Kozlowski]",...,US,English,"{'Traditional': '我們誕生在中國', 'Simplified': '我们诞生...",Paul Hogan,,tt0099018,6939946.0,25000000.0,1990-12-19,95.0


In [35]:
# 14. Check the movies_with_ratings_df DataFrame.
movies_with_ratings_df.head()

Unnamed: 0,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,budget,...,rating_0.5,rating_1.0,rating_1.5,rating_2.0,rating_2.5,rating_3.0,rating_3.5,rating_4.0,rating_4.5,rating_5.0
0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,49000000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,6000000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,35000000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,12000000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,25000000.0,...,3.0,0.0,3.0,2.0,5.0,26.0,37.0,46.0,16.0,11.0


In [37]:
# 15. Check the movies_df DataFrame. 
movies_df.head()

Unnamed: 0,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,budget,...,genres,original_language,overview,spoken_languages,country,production_companies,production_countries,starring,cinematography,based_on
0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,49000000.0,...,"[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...",en,"Ford ""Mr. Rock n' Roll Detective"" Fairlane is ...","[{'iso_639_1': 'en', 'name': 'English'}]",United States,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...","[Andrew Dice Clay, Wayne Newton, Priscilla Pre...",Oliver Wood,"[Characters, by Rex Weiner]"
1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,6000000.0,...,"[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...",en,The intriguing relationship between three desp...,"[{'iso_639_1': 'en', 'name': 'English'}]",United States,"[{'name': 'Avenue Pictures Productions', 'id':...","[{'iso_3166_1': 'US', 'name': 'United States o...","[Jason Patric, Rachel Ward, Bruce Dern, George...",Mark Plummer,"[the novel, After Dark, My Sweet, by, Jim Thom..."
2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,35000000.0,...,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",en,Air America was the CIA's private airline oper...,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",United States,"[{'name': 'IndieProd Company Productions', 'id...","[{'iso_3166_1': 'US', 'name': 'United States o...","[Mel Gibson, Robert Downey Jr., Nancy Travis, ...",Roger Deakins,"[Air America, by, Christopher Robbins]"
3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,12000000.0,...,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",en,"Alice Tate, mother of two, with a marriage of ...","[{'iso_639_1': 'en', 'name': 'English'}]",United States,"[{'name': 'Orion Pictures', 'id': 41}]","[{'iso_3166_1': 'US', 'name': 'United States o...","[Alec Baldwin, Blythe Danner, Judy Davis, Mia ...",Carlo Di Palma,
4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,25000000.0,...,"[{'id': 14, 'name': 'Fantasy'}, {'id': 35, 'na...",en,Terry Dean is an electronics wizard and thief....,"[{'iso_639_1': 'en', 'name': 'English'}]",US,"[{'name': 'Paramount Pictures', 'id': 4}]","[{'iso_3166_1': 'US', 'name': 'United States o...","[Paul Hogan, Elias Koteas, Linda Kozlowski]",Russell Boyd,
