In [1]:
import json
import pandas as pd
import re
import numpy as np
from pprint import pprint

In [2]:
with open('./Resources/movie_data.json') as load_file :
    movie_json = load_file.read()
movie_json = json.loads(movie_json)

In [3]:
movie_json[0]

{'adult': False,
 'backdrop_path': '/kXfqcdQKsToO0OUXHcrrNCHDBzO.jpg',
 'belongs_to_collection': None,
 'budget': 25000000,
 'genres': [{'id': 18, 'name': 'Drama'}, {'id': 80, 'name': 'Crime'}],
 'homepage': '',
 'id': 278,
 'imdb_id': 'tt0111161',
 'original_language': 'en',
 'original_title': 'The Shawshank Redemption',
 'overview': 'Framed in the 1940s for the double murder of his wife and her lover, upstanding banker Andy Dufresne begins a new life at the Shawshank prison, where he puts his accounting skills to work for an amoral warden. During his long stretch in prison, Dufresne comes to be admired by the other inmates -- including an older prisoner named Red -- for his integrity and unquenchable sense of hope.',
 'popularity': 97.167,
 'poster_path': '/lyQBXzOQSuE59IsHyhrp0qIiPAz.jpg',
 'production_companies': [{'id': 97,
   'logo_path': '/qv3ih9pR9w2XNKZDsqDqAGuZjqc.png',
   'name': 'Castle Rock Entertainment',
   'origin_country': 'US'}],
 'production_countries': [{'iso_3166_1

In [4]:
# collect all genres, original_language, production_countries

genres_ls = []
language_ls = []
country_ls = []

for i in range(len(movie_json)) :
    
    try:
        temp_json = movie_json[i]['genres']
        for j in range(len(temp_json)) :        
            genres_ls.append( temp_json[j]['name'] )
    except:
        genres_ls.append( 'Other' )
    
    try: 
        language_ls.append(movie_json[i]['spoken_languages'][0]['english_name'])
    except:
        language_ls.append( 'Other' )
    
    try:
        country_ls.append(movie_json[i]['production_countries'][0]['name'])
    except:
        country_ls.append('Other')

In [5]:
# find the count for each value, and drop genres less than 100, label as Other
genres_df = pd.DataFrame(genres_ls , columns=['genres'] ).value_counts().to_frame('count').reset_index(drop=False)
genres_df['label_genre'] = genres_df['genres']
genres_df.loc[ genres_df['count'] < 100 , 'label_genre'] = 'Other'
genres_df.head(25)

Unnamed: 0,genres,count,label_genre
0,Drama,2354,Drama
1,Comedy,1809,Comedy
2,Thriller,1249,Thriller
3,Action,1078,Action
4,Romance,862,Romance
5,Adventure,801,Adventure
6,Crime,744,Crime
7,Horror,667,Horror
8,Family,594,Family
9,Science Fiction,559,Science Fiction


In [6]:
# df to identify key_genres, pick the minor genres as key_genres when it's appeared
key_genres_ls = list( genres_df['genres'] )

key_genres_ls

['Drama',
 'Comedy',
 'Thriller',
 'Action',
 'Romance',
 'Adventure',
 'Crime',
 'Horror',
 'Family',
 'Science Fiction',
 'Fantasy',
 'Animation',
 'Mystery',
 'History',
 'War',
 'Music',
 'TV Movie',
 'Western']

In [7]:
# find the count for each value, and drop language less than 20, label as Other
language_df = pd.DataFrame(language_ls , columns=['spoken_language'] ).value_counts().to_frame('count').reset_index(drop=False)
language_df['label_language'] = language_df['spoken_language']
language_df.loc[ language_df['count'] < 20 , 'label_language'] = 'Other'
language_df = pd.concat( [ language_df , pd.DataFrame( [{ 'spoken_language' : 'Other' , 'count' : 0, 'label_language' : 'Other'}] ) ] )
language_df

Unnamed: 0,spoken_language,count,label_language
0,English,3523,English
1,French,320,French
2,Italian,216,Italian
3,Spanish,168,Spanish
4,Japanese,153,Japanese
...,...,...,...
61,Bosnian,1,Other
62,Bambara,1,Other
63,Amharic,1,Other
64,Yiddish,1,Other


In [8]:
# find the count for each value, and drop origin less than 100, label as Other
origin_df = pd.DataFrame(country_ls , columns=['origin'] ).value_counts().to_frame('count').reset_index(drop=False)
origin_df['label_origin'] = origin_df['origin']
origin_df.loc[ origin_df['count'] < 20 , 'label_origin'] = 'Other'
origin_df = pd.concat( [ origin_df , pd.DataFrame( [{ 'origin' : 'Other' , 'count' : 0, 'label_origin' : 'Other'}] ) ] )
origin_df.head(25)

Unnamed: 0,origin,count,label_origin
0,United States of America,2529,United States of America
1,France,427,France
2,United Kingdom,357,United Kingdom
3,Canada,251,Canada
4,Italy,224,Italy
5,Japan,174,Japan
6,Germany,162,Germany
7,Belgium,113,Belgium
8,Spain,82,Spain
9,Australia,76,Australia


In [9]:
# structure for new database
new_keys = [ 'id' , 'imdb_id' , 'all_genres', 'key_genre' , 'spoken_language', 'origin', 'release_year' , 'runtime' , 'awards' ,
            'title', 'director', 'writer', 'actors', 'overview', 'tagline' , 'plot', 
            'budget' , 'revenue' , 'rating_tmdb', 'votes_tmdb' , 'rating_imdb', 'votes_imdb' , 
            'rating_rotten_tomatoes', 'rating_metacritic' , 'rating_intranet']

In [10]:
clean_json = []

for i in range(len(movie_json)) :
    
    temp_json = {}

    temp_json[ 'id' ] = movie_json[i]['id']
    temp_json[ 'imdb_id' ] = movie_json[i]['imdb_id']

    temp_genres = movie_json[i]['genres']

    temp_ls = [] # merge all genres to one string all_genres
    for j in range(len(temp_genres)):
        temp_ls.append( temp_genres[j]['name'] )
        temp_text = ', '.join(temp_ls)
        temp_json[ 'all_genres' ] = temp_text

    for j in range(len(key_genres_ls)): # find one key_genre, use the minor genre
        if bool(re.search( key_genres_ls[j] , temp_text )) :
            temp_json[ 'key_genre' ] = key_genres_ls[j]
    
    try :
        temp_json[ 'spoken_language' ] = movie_json[i]['spoken_languages'][0]['english_name']
    except :
        temp_json[ 'spoken_language' ] = 'Other'
    
    try :
        temp_json[ 'origin' ] = movie_json[i]['production_countries'][0]['name']
    except :
        temp_json[ 'origin' ] = 'Other'
        
    temp_json[ 'release_year' ] = movie_json[i]['release_date'][0:4]
    temp_json[ 'runtime' ] = movie_json[i]['runtime']
        
    temp_json[ 'title' ] = movie_json[i]['title']
    temp_json[ 'overview' ] = movie_json[i]['overview']
    temp_json[ 'tagline' ] = movie_json[i]['tagline']

    temp_json[ 'budget' ] = movie_json[i]['budget']
    temp_json[ 'revenue' ] = movie_json[i]['revenue']

    temp_json[ 'rating_tmdb' ] = movie_json[i]['vote_average']
    temp_json[ 'votes_tmdb' ] = movie_json[i]['vote_count']
    
    if movie_json[i]['imdb_data']['Response'] == 'True':
        temp_json[ 'awards' ] = movie_json[i]['imdb_data']['Awards']
        temp_json[ 'director' ] = movie_json[i]['imdb_data']['Director']
        temp_json[ 'writer' ] = movie_json[i]['imdb_data']['Writer']
        temp_json[ 'actors' ] = movie_json[i]['imdb_data']['Actors']
        temp_json[ 'plot' ] = movie_json[i]['imdb_data']['Plot']
        temp_json[ 'rating_imdb' ] = movie_json[i]['imdb_data']['imdbRating']
        temp_json[ 'votes_imdb' ] = movie_json[i]['imdb_data']['imdbVotes']
        
        
        temp_ratings = movie_json[i]['imdb_data']['Ratings']
        try:
            temp_json[ 'rating_rotten_tomatoes' ] = [x for x in temp_ratings if x['Source'] == 'Rotten Tomatoes'][0]['Value']
        except:
            temp_json[ 'rating_rotten_tomatoes' ] = ""

        try :
            temp_json[ 'rating_metacritic' ] = [x for x in temp_ratings if x['Source'] == 'Metacritic'][0]['Value']
        except :
            temp_json[ 'rating_metacritic' ] = ""

        try :
            temp_json[ 'rating_intranet' ] = [x for x in temp_ratings if x['Source'] == 'Internet Movie Database'][0]['Value']
        except :
            temp_json[ 'rating_intranet' ] = ""
        
    else :
        temp_json[ 'awards' ] = ""
        temp_json[ 'director' ] = ""
        temp_json[ 'writer' ] = ""
        temp_json[ 'actors' ] = ""
        temp_json[ 'plot' ] = ""
        temp_json[ 'rating_imdb' ] = ""
        temp_json[ 'votes_imdb' ] = ""
        temp_json[ 'rating_rotten_tomatoes' ] = ""
        temp_json[ 'rating_metacritic' ] = ""
        temp_json[ 'rating_intranet' ] = ""
        
        print(f'imdb error {i}')
    
    
    clean_json.append(temp_json)
    
    if i % 500 == 0 :
        print(f'{i} / { len(movie_json) }')
    

0 / 4997
500 / 4997
imdb error 785
1000 / 4997
1500 / 4997
2000 / 4997
2500 / 4997
3000 / 4997
3500 / 4997
4000 / 4997
4500 / 4997


In [11]:
# convert json to dataframe
clean_df = pd.DataFrame(clean_json)
clean_df.head()

Unnamed: 0,id,imdb_id,all_genres,key_genre,spoken_language,origin,release_year,runtime,title,overview,...,awards,director,writer,actors,plot,rating_imdb,votes_imdb,rating_rotten_tomatoes,rating_metacritic,rating_intranet
0,278,tt0111161,"Drama, Crime",Crime,English,United States of America,1994,142,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,...,Nominated for 7 Oscars. 21 wins & 42 nominatio...,Frank Darabont,"Stephen King, Frank Darabont","Tim Robbins, Morgan Freeman, Bob Gunton","Over the course of several years, two convicts...",9.3,2802199,91%,82/100,9.3/10
1,424,tt0108052,"Drama, History, War",War,German,United States of America,1993,195,Schindler's List,The true story of how businessman Oskar Schind...,...,Won 7 Oscars. 91 wins & 49 nominations total,Steven Spielberg,"Thomas Keneally, Steven Zaillian","Liam Neeson, Ralph Fiennes, Ben Kingsley","In German-occupied Poland during World War II,...",9.0,1404593,98%,95/100,9.0/10
2,569094,tt9362722,"Animation, Action, Adventure",Animation,English,United States of America,2023,140,Spider-Man: Across the Spider-Verse,"After reuniting with Gwen Stacy, Brooklyn’s fu...",...,3 wins & 1 nomination,"Joaquim Dos Santos, Kemp Powers, Justin K. Tho...","Phil Lord, Christopher Miller, Dave Callaham","Shameik Moore, Hailee Steinfeld, Brian Tyree H...","Miles Morales catapults across the Multiverse,...",8.7,252325,95%,86/100,8.7/10
3,129,tt0245429,"Animation, Family, Fantasy",Animation,Japanese,Japan,2001,125,Spirited Away,"A young girl, Chihiro, becomes trapped in a st...",...,Won 1 Oscar. 58 wins & 31 nominations total,Hayao Miyazaki,Hayao Miyazaki,"Daveigh Chase, Suzanne Pleshette, Miyu Irino","During her family's move to the suburbs, a sul...",8.6,807746,96%,96/100,8.6/10
4,496243,tt6751668,"Comedy, Thriller, Drama",Thriller,English,South Korea,2019,133,Parasite,"All unemployed, Ki-taek's family takes peculia...",...,Won 4 Oscars. 306 wins & 264 nominations total,Bong Joon Ho,"Bong Joon Ho, Han Jin-won","Song Kang-ho, Lee Sun-kyun, Cho Yeo-jeong",Greed and class discrimination threaten the ne...,8.5,891596,99%,96/100,8.5/10


In [12]:
clean_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4997 entries, 0 to 4996
Data columns (total 25 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   id                      4997 non-null   int64  
 1   imdb_id                 4996 non-null   object 
 2   all_genres              4997 non-null   object 
 3   key_genre               4997 non-null   object 
 4   spoken_language         4997 non-null   object 
 5   origin                  4997 non-null   object 
 6   release_year            4997 non-null   object 
 7   runtime                 4997 non-null   int64  
 8   title                   4997 non-null   object 
 9   overview                4997 non-null   object 
 10  tagline                 4997 non-null   object 
 11  budget                  4997 non-null   int64  
 12  revenue                 4997 non-null   int64  
 13  rating_tmdb             4997 non-null   float64
 14  votes_tmdb              4997 non-null   

In [13]:
# clean data

# change col type to numeric
clean_df['release_year'] = pd.to_numeric( clean_df['release_year'] , errors='coerce')
clean_df['rating_imdb'] = pd.to_numeric( clean_df['rating_imdb'] , errors='coerce')

# remove none 0-9 from str, change col type to numeric
clean_df['votes_imdb'] = [ re.sub( "[^0-9]","" , str(vote) ) for vote in clean_df['votes_imdb'] ]
clean_df['votes_imdb'] = pd.to_numeric( clean_df['votes_imdb'] , errors='coerce')

# change Ratings_Rotten_Tomatoes str 78% to int 7.8
clean_df['rating_rotten_tomatoes'] = [ re.sub( "\%","" , str(rate) ) for rate in clean_df['rating_rotten_tomatoes'] ]
clean_df['rating_rotten_tomatoes'] = pd.to_numeric(clean_df['rating_rotten_tomatoes'], errors='coerce')
clean_df['rating_rotten_tomatoes'] = clean_df['rating_rotten_tomatoes']/10

# change Ratings_Metacritic str 78/100 to int 7.8
clean_df['rating_metacritic'] = [ re.sub( "\/100","" , str(rate) ) for rate in clean_df['rating_metacritic'] ]
clean_df['rating_metacritic'] = pd.to_numeric(clean_df['rating_metacritic'], errors='coerce')
clean_df['rating_metacritic'] = clean_df['rating_metacritic']/10

# change Ratings_IMD str 7.8/10 to int 7.8
clean_df['rating_intranet'] = [ re.sub( "\/10","" , str(rate) ) for rate in clean_df[ 'rating_intranet' ] ]
clean_df['rating_intranet'] = pd.to_numeric(clean_df['rating_intranet'], errors='coerce')

# round all rating to increase the accuracy
clean_df['rating_tmdb'] = round(clean_df['rating_tmdb'])
clean_df['rating_imdb'] = round(clean_df['rating_imdb'])
clean_df['rating_rotten_tomatoes'] = round(clean_df['rating_rotten_tomatoes'])
clean_df['rating_metacritic'] = round(clean_df['rating_metacritic'])
clean_df['rating_intranet'] = round(clean_df['rating_intranet'])

In [14]:
clean_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4997 entries, 0 to 4996
Data columns (total 25 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   id                      4997 non-null   int64  
 1   imdb_id                 4996 non-null   object 
 2   all_genres              4997 non-null   object 
 3   key_genre               4997 non-null   object 
 4   spoken_language         4997 non-null   object 
 5   origin                  4997 non-null   object 
 6   release_year            4997 non-null   int64  
 7   runtime                 4997 non-null   int64  
 8   title                   4997 non-null   object 
 9   overview                4997 non-null   object 
 10  tagline                 4997 non-null   object 
 11  budget                  4997 non-null   int64  
 12  revenue                 4997 non-null   int64  
 13  rating_tmdb             4997 non-null   float64
 14  votes_tmdb              4997 non-null   

In [15]:
# merge data with label_ to drop minor categories

reduce_df = pd.merge( clean_df , genres_df[['genres', 'label_genre']] , how='left' , left_on = 'key_genre' , right_on = 'genres' )
reduce_df = reduce_df.drop('genres', axis=1)

reduce_df = pd.merge( reduce_df , language_df[['spoken_language', 'label_language']] , how='left' , on='spoken_language' )

reduce_df = pd.merge( reduce_df , origin_df[['origin', 'label_origin']] , how='left' , on='origin' )

reduce_df = reduce_df.drop_duplicates().reset_index(drop=True)

reduce_df

Unnamed: 0,id,imdb_id,all_genres,key_genre,spoken_language,origin,release_year,runtime,title,overview,...,actors,plot,rating_imdb,votes_imdb,rating_rotten_tomatoes,rating_metacritic,rating_intranet,label_genre,label_language,label_origin
0,278,tt0111161,"Drama, Crime",Crime,English,United States of America,1994,142,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,...,"Tim Robbins, Morgan Freeman, Bob Gunton","Over the course of several years, two convicts...",9.0,2802199.0,9.0,8.0,9.0,Crime,English,United States of America
1,424,tt0108052,"Drama, History, War",War,German,United States of America,1993,195,Schindler's List,The true story of how businessman Oskar Schind...,...,"Liam Neeson, Ralph Fiennes, Ben Kingsley","In German-occupied Poland during World War II,...",9.0,1404593.0,10.0,10.0,9.0,War,German,United States of America
2,569094,tt9362722,"Animation, Action, Adventure",Animation,English,United States of America,2023,140,Spider-Man: Across the Spider-Verse,"After reuniting with Gwen Stacy, Brooklyn’s fu...",...,"Shameik Moore, Hailee Steinfeld, Brian Tyree H...","Miles Morales catapults across the Multiverse,...",9.0,252325.0,10.0,9.0,9.0,Animation,English,United States of America
3,129,tt0245429,"Animation, Family, Fantasy",Animation,Japanese,Japan,2001,125,Spirited Away,"A young girl, Chihiro, becomes trapped in a st...",...,"Daveigh Chase, Suzanne Pleshette, Miyu Irino","During her family's move to the suburbs, a sul...",9.0,807746.0,10.0,10.0,9.0,Animation,Japanese,Japan
4,496243,tt6751668,"Comedy, Thriller, Drama",Thriller,English,South Korea,2019,133,Parasite,"All unemployed, Ki-taek's family takes peculia...",...,"Song Kang-ho, Lee Sun-kyun, Cho Yeo-jeong",Greed and class discrimination threaten the ne...,8.0,891596.0,10.0,10.0,8.0,Thriller,English,South Korea
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4992,19912,tt1144884,"Horror, Mystery",Mystery,English,United States of America,2009,82,The Final Destination,After a young man's premonition of a deadly ra...,...,"Nick Zano, Krista Allen, Andrew Fiscella",A horrifying premonition saves a young man and...,5.0,111703.0,3.0,3.0,5.0,Mystery,English,United States of America
4993,823461,tt8231668,"Comedy, Romance",Romance,English,United States of America,2021,92,Good on Paper,After years of putting her career ahead of lov...,...,"Iliza Shlesinger, Britney Young, Christopher N...","After years of putting her career first, a sta...",6.0,13475.0,5.0,5.0,6.0,Romance,English,United States of America
4994,582607,tt8743064,"Science Fiction, Drama, Adventure, Crime, Action",Science Fiction,English,United States of America,2019,86,See You Yesterday,As two teen prodigies try to master the art of...,...,"Eden Duncan-Smith, Dante Crichlow, Astro","Two Brooklyn teenage prodigies, C.J. Walker an...",5.0,11378.0,9.0,7.0,5.0,Science Fiction,English,United States of America
4995,300467,tt2251281,"Horror, Thriller",Horror,English,United States of America,2014,97,Dark Was the Night,An evil is unleashed in a small town when a lo...,...,"Kevin Durand, Lukas Haas, Bianca Kajlich",An evil is unleashed in a small town when a lo...,6.0,10805.0,5.0,4.0,6.0,Horror,English,United States of America


In [16]:
reduce_df.describe()

Unnamed: 0,id,release_year,runtime,budget,revenue,rating_tmdb,votes_tmdb,rating_imdb,votes_imdb,rating_rotten_tomatoes,rating_metacritic,rating_intranet
count,4997.0,4997.0,4997.0,4997.0,4997.0,4997.0,4997.0,4992.0,4996.0,4179.0,3603.0,4992.0
mean,181066.5,2002.846908,105.254953,19953280.0,60006740.0,6.752852,1690.961577,6.651643,88752.65,6.582675,6.011379,6.651643
std,237313.1,18.275305,23.427602,37743780.0,149033000.0,0.6893,2931.370971,0.884562,175444.1,2.575624,1.754073,0.884562
min,2.0,1902.0,0.0,0.0,0.0,5.0,201.0,2.0,35.0,0.0,1.0,2.0
25%,10320.0,1995.0,93.0,0.0,0.0,6.0,334.0,6.0,13049.5,5.0,5.0,6.0
50%,34067.0,2009.0,103.0,4000000.0,6739141.0,7.0,631.0,7.0,30638.5,7.0,6.0,7.0
75%,341012.0,2016.0,117.0,23000000.0,50500000.0,7.0,1604.0,7.0,89953.5,9.0,7.0,7.0
max,1077280.0,2023.0,366.0,379000000.0,2264162000.0,9.0,32606.0,9.0,2802199.0,10.0,10.0,9.0


In [17]:
# file for tableau

reduce_df.to_csv('./Resources/movie_tableau.csv')

In [18]:
# drop columns for machine learning

ml_df = reduce_df.drop([ 'id', 'imdb_id' , 'key_genre' , 'spoken_language' , 'origin' ], axis=1)

# file for machine learning

ml_df.to_csv('./Resources/movie_machine_learning.csv')

In [4]:
# read oscar winning movie list
movie_df = pd.read_csv("./Resources/TMDB10000.csv", index_col='Unnamed: 0')
movie_df = movie_df[['id','release_date','title']]
movie_df.to_csv("./Resources/TMDB10000.csv")

Unnamed: 0_level_0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,keywords,cast,crew
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Drama', 'Crime']",238,en,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",124.718,https://image.tmdb.org/t/p/original//3bhkrj58V...,1972-03-14,The Godfather,False,8.7,18148,"[{'id': 131, 'name': 'italy'}, {'id': 697, 'na...","[{'adult': False, 'gender': 2, 'id': 3084, 'kn...","[{'adult': False, 'gender': 2, 'id': 154, 'kno..."
1,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Drama', 'Crime']",278,en,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,92.927,https://image.tmdb.org/t/p/original//lyQBXzOQS...,1994-09-23,The Shawshank Redemption,False,8.7,24030,"[{'id': 378, 'name': 'prison'}, {'id': 417, 'n...","[{'adult': False, 'gender': 2, 'id': 504, 'kno...","[{'adult': False, 'gender': 2, 'id': 153, 'kno..."
2,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Drama', 'Crime']",240,en,The Godfather Part II,In the continuing saga of the Corleone crime f...,61.583,https://image.tmdb.org/t/p/original//bMadFzhjy...,1974-12-20,The Godfather Part II,False,8.6,10955,"[{'id': 131, 'name': 'italy'}, {'id': 700, 'na...","[{'adult': False, 'gender': 2, 'id': 1158, 'kn...","[{'adult': False, 'gender': 2, 'id': 154, 'kno..."
3,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Drama', 'History', 'War']",424,en,Schindler's List,The true story of how businessman Oskar Schind...,48.961,https://image.tmdb.org/t/p/original//sF1U4EUQS...,1993-12-15,Schindler's List,False,8.6,14203,"[{'id': 818, 'name': 'based on novel or book'}...","[{'adult': False, 'gender': 2, 'id': 3896, 'kn...","[{'adult': False, 'gender': 2, 'id': 491, 'kno..."
4,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Comedy', 'Drama', 'Romance']",19404,hi,दिलवाले दुल्हनिया ले जायेंगे,"Raj is a rich, carefree, happy-go-lucky second...",28.639,https://image.tmdb.org/t/p/original//ktejodbcd...,1995-10-19,Dilwale Dulhania Le Jayenge,False,8.6,4157,"[{'id': 2783, 'name': ""family's daily life""}, ...","[{'adult': False, 'gender': 2, 'id': 35742, 'k...","[{'adult': False, 'gender': 1, 'id': 8311, 'kn..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Horror', 'Thriller']",300467,en,Dark Was the Night,An evil is unleashed in a small town when a lo...,8.134,https://image.tmdb.org/t/p/original//xSC7sWXba...,2014-10-17,Dark Was the Night,False,5.6,223,"[{'id': 447, 'name': 'post traumatic stress di...","[{'adult': False, 'gender': 2, 'id': 79072, 'k...","[{'adult': False, 'gender': 2, 'id': 59657, 'k..."
9996,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Action', 'Crime', 'Thriller']",106747,en,Machete Kills,Ex-Federale agent Machete is recruited by the ...,14.935,https://image.tmdb.org/t/p/original//3i4UgSZmA...,2013-09-12,Machete Kills,False,5.6,1717,"[{'id': 534, 'name': 'mexico'}, {'id': 833, 'n...","[{'adult': False, 'gender': 2, 'id': 11160, 'k...","[{'adult': False, 'gender': 2, 'id': 2294, 'kn..."
9997,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Adventure', 'Comedy', 'Family', 'Romance']",10603,en,George of the Jungle,"Deep in the African jungle, a baby named Georg...",16.061,https://image.tmdb.org/t/p/original//lWp8hUqE4...,1997-07-15,George of the Jungle,False,5.6,1875,"[{'id': 409, 'name': 'africa'}, {'id': 582, 'n...","[{'adult': False, 'gender': 2, 'id': 18269, 'k...","[{'adult': False, 'gender': 2, 'id': 373, 'kno..."
9998,False,https://image.tmdb.org/t/p/original//xOA4MbyJZ...,"['Drama', 'Horror', 'Thriller']",692969,no,Kadaver,"In the aftermath of a nuclear disaster, a star...",13.666,https://image.tmdb.org/t/p/original//bo94ZQ0Sl...,2020-10-22,Cadaver,False,5.6,553,"[{'id': 612, 'name': 'hotel'}, {'id': 2546, 'n...","[{'adult': False, 'gender': 1, 'id': 63758, 'k...","[{'adult': False, 'gender': 2, 'id': 20308, 'k..."
