In [1]:
#importing the required libraries

import json
import pandas as pd
import requests

## Getting the data from pages 1 to 99

In [None]:
import time

#Creating the empty lists
movie_titles = []
movie_overviews = []
movie_ids = []

# Set up retry parameters
max_retries = 5
retry_delay = 5  # seconds

# Loop through pages to get the required data
for page in range(1, 100):  # request up to 99 pages
    url = f"https://api.themoviedb.org/3/movie/top_rated?language=en-US&page={page}"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
    }

    success = False
    for attempt in range(max_retries):
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            movie_data = response.json().get("results", [])

            # Extract titles and overviews
            temp_titles = [movie['title'] for movie in movie_data]
            temp_overviews = [movie['overview'] for movie in movie_data]
            temp_ids = [movie['id'] for movie in movie_data]

            movie_titles.extend(temp_titles)
            movie_overviews.extend(temp_overviews)
            movie_ids.extend(temp_ids)

            success = True
            break
        else:
            print(f"Error fetching data for page {page} (status code {response.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for page {page} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Create DataFrame
df = pd.DataFrame({
    'Title': movie_titles,
    'Overview': movie_overviews,
    'Id': movie_ids
})

print(df.shape)

(1980, 3)


In [None]:
df.head(20)

Unnamed: 0,Title,Overview,Id
0,The Shawshank Redemption,Imprisoned in the 1940s for the double murder ...,278
1,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",238
2,The Godfather Part II,In the continuing saga of the Corleone crime f...,240
3,Schindler's List,The true story of how businessman Oskar Schind...,424
4,12 Angry Men,The defense and the prosecution have rested an...,389
5,Dilwale Dulhania Le Jayenge,"Raj is a rich, carefree, happy-go-lucky second...",19404
6,Spirited Away,"A young girl, Chihiro, becomes trapped in a st...",129
7,The Dark Knight,Batman raises the stakes in his war on crime. ...,155
8,Parasite,"All unemployed, Ki-taek's family takes peculia...",496243
9,The Green Mile,A supernatural tale set on death row in a Sout...,497


In [None]:
# Retrieve details, credits, and keywords for each movie
# lists for details
movie_genres = []
movie_tagline = []

# lists for credits
movie_cast = []
movie_crew = []

# lists for keywords
movie_keywords = []

# running a loop through all the movie ids to get the required information (details, credits, keywords)
for movie_id in movie_ids:
    success = False
    for attempt in range(max_retries):
        details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=en-US"
        credits_url =  f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=en-US"
        keywords_url = f"https://api.themoviedb.org/3/movie/{movie_id}/keywords"

        headers = {
            "accept": "application/json",
            "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
        }

        response_details = requests.get(details_url, headers=headers)
        response_credits = requests.get(credits_url, headers=headers)
        response_keywords = requests.get(keywords_url, headers=headers)



        if response_details.status_code == 200 and response_credits.status_code == 200 and response_keywords.status_code == 200:

            obj_details_genre = response_details.json().get("genres",[])
            genres = [genre['name'] for genre in obj_details_genre]
            obj_details_tagline = response_details.json().get('tagline',[])
            tagline = obj_details_tagline
            movie_genres.append(genres)
            movie_tagline.append(tagline)

            obj_credits_cast = response_credits.json().get("cast",[])
            cast = [actor['name'] for actor in obj_credits_cast][:10]
            obj_credits_crew = response_credits.json().get("crew",[])
            crew = [director['name'] for director in obj_credits_crew if director['job'] == 'Director' or director['job'] == 'Producer']
            movie_cast.append(cast)
            movie_crew.append(crew)

            obj_keywords_key = response_keywords.json().get("keywords",[])
            keywords = [keyword['name'] for keyword in obj_keywords_key]
            movie_keywords.append(keywords)

            success = True
            break
        else:
            print(f"Error fetching data for movie {movie_id} (status codes: {response_details.status_code}, {response_credits.status_code}, {response_keywords.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for movie {movie_id} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Combine all information into a single DataFrame
df_metadata = pd.DataFrame({
    'Id': movie_ids,
    'Genre':movie_genres,
    'Tagline':movie_tagline,
    'Cast': movie_cast,
    'Crew': movie_crew,
    'Keywords': movie_keywords
})

print(df_metadata.shape)
print(movie_ids)
print(movie_genres)
print(movie_tagline)
print(movie_cast)
print(movie_crew)
print(movie_keywords)

(1980, 6)
[278, 238, 240, 424, 389, 19404, 129, 155, 496243, 497, 372058, 680, 122, 13, 429, 769, 12477, 346, 11216, 667257, 637, 372754, 1058694, 550, 157336, 539, 598, 510, 311, 696374, 704264, 120, 255709, 324857, 4935, 724089, 40096, 121, 620249, 568332, 1891, 14537, 423, 244786, 1160164, 761053, 378064, 807, 27205, 569094, 567, 274, 73, 128, 820067, 1139087, 92321, 914, 105, 12493, 644479, 18491, 207, 599, 3782, 101, 10494, 3082, 335, 901, 29259, 28, 77338, 1585, 975, 637920, 527641, 632632, 447362, 652837, 995133, 10376, 25237, 8587, 670, 630566, 299534, 283566, 508965, 533514, 299536, 490132, 42269, 315162, 618344, 265177, 110420, 572154, 635302, 504253, 290098, 654299, 16869, 603, 98, 361743, 37257, 694, 441130, 857, 354912, 24188, 50014, 284, 522924, 11324, 1124, 185, 11, 16672, 5156, 797, 490, 324786, 556574, 810693, 476292, 313106, 26451, 20941, 629, 537061, 20334, 620683, 610892, 10098, 77, 92060, 68718, 18148, 592350, 426, 111, 851644, 1422, 693134, 1398, 475557, 872, 422,

In [None]:
df_metadata.head()

Unnamed: 0,Id,Genre,Tagline,Cast,Crew,Keywords
0,278,"[Drama, Crime]",Fear can hold you prisoner. Hope can set you f...,"[Tim Robbins, Morgan Freeman, Bob Gunton, Will...","[Niki Marvin, Frank Darabont]","[prison, friendship, police brutality, corrupt..."
1,238,"[Drama, Crime]",An offer you can't refuse.,"[Marlon Brando, Al Pacino, James Caan, Robert ...","[Albert S. Ruddy, Francis Ford Coppola, Franci...","[based on novel or book, loss of loved one, lo..."
2,240,"[Drama, Crime]",The rise and fall of the Corleone empire.,"[Al Pacino, Robert Duvall, Diane Keaton, Rober...","[Francis Ford Coppola, Francis Ford Coppola]","[italian american, cuba, italy, gangster, prai..."
3,424,"[Drama, History, War]","Whoever saves one life, saves the world entire.","[Liam Neeson, Ben Kingsley, Ralph Fiennes, Car...","[Gerald R. Molen, Steven Spielberg, Steven Spi...","[factory, hero, based on novel or book, nazi, ..."
4,389,[Drama],Life is in their hands — Death is on their minds!,"[Martin Balsam, John Fiedler, Lee J. Cobb, E.G...","[Reginald Rose, Henry Fonda, Sidney Lumet]","[death penalty, anonymity, court case, court, ..."


In [None]:
df_metadata.shape

(1980, 6)

In [None]:
# applying merge
df_merged = df.merge(df_metadata[['Id','Genre','Tagline','Cast','Crew','Keywords']])
df_merged.head()

Unnamed: 0,Title,Overview,Id,Genre,Tagline,Cast,Crew,Keywords
0,The Shawshank Redemption,Imprisoned in the 1940s for the double murder ...,278,"[Drama, Crime]",Fear can hold you prisoner. Hope can set you f...,"[Tim Robbins, Morgan Freeman, Bob Gunton, Will...","[Niki Marvin, Frank Darabont]","[prison, friendship, police brutality, corrupt..."
1,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",238,"[Drama, Crime]",An offer you can't refuse.,"[Marlon Brando, Al Pacino, James Caan, Robert ...","[Albert S. Ruddy, Francis Ford Coppola, Franci...","[based on novel or book, loss of loved one, lo..."
2,The Godfather Part II,In the continuing saga of the Corleone crime f...,240,"[Drama, Crime]",The rise and fall of the Corleone empire.,"[Al Pacino, Robert Duvall, Diane Keaton, Rober...","[Francis Ford Coppola, Francis Ford Coppola]","[italian american, cuba, italy, gangster, prai..."
3,Schindler's List,The true story of how businessman Oskar Schind...,424,"[Drama, History, War]","Whoever saves one life, saves the world entire.","[Liam Neeson, Ben Kingsley, Ralph Fiennes, Car...","[Gerald R. Molen, Steven Spielberg, Steven Spi...","[factory, hero, based on novel or book, nazi, ..."
4,12 Angry Men,The defense and the prosecution have rested an...,389,[Drama],Life is in their hands — Death is on their minds!,"[Martin Balsam, John Fiedler, Lee J. Cobb, E.G...","[Reginald Rose, Henry Fonda, Sidney Lumet]","[death penalty, anonymity, court case, court, ..."


## Now, let's get the data for page 100 to 199

---



In [None]:
import time

#Creating the empty lists
movie_titles_200 = []
movie_overviews_200 = []
movie_ids_200 = []

# Set up retry parameters
max_retries = 5
retry_delay = 5  # seconds

# Loop through pages to get the required data
for page in range(100, 200):  # request up to 199 pages
    url = f"https://api.themoviedb.org/3/movie/top_rated?language=en-US&page={page}"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
    }

    success = False
    for attempt in range(max_retries):
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            movie_data = response.json().get("results", [])

            # Extract titles and overviews
            temp_titles = [movie['title'] for movie in movie_data]
            temp_overviews = [movie['overview'] for movie in movie_data]
            temp_ids = [movie['id'] for movie in movie_data]

            movie_titles_200.extend(temp_titles)
            movie_overviews_200.extend(temp_overviews)
            movie_ids_200.extend(temp_ids)

            success = True
            break
        else:
            print(f"Error fetching data for page {page} (status code {response.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for page {page} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Create DataFrame
df_200 = pd.DataFrame({
    'Title': movie_titles_200,
    'Overview': movie_overviews_200,
    'Id': movie_ids_200
})

print(df_200.shape)

(2000, 3)


In [None]:
df_200.head(20)

Unnamed: 0,Title,Overview,Id
0,Twice Born,Full-throttle melodrama about an ill-starred r...,121642
1,Naruto Shippuden the Movie: Blood Prison,After his capture for attempted assassination ...,75624
2,From Here to Eternity,"In 1941 Hawaii, a private is cruelly punished ...",11426
3,Total Recall,Construction worker Douglas Quaid's obsession ...,861
4,Mia and the White Lion,A young girl from London moves to Africa with ...,498248
5,GANTZ:O,After being brutally murdered in a subway stat...,396263
6,The Fundamentals of Caring,"Having suffered a tragedy, Ben becomes a careg...",318121
7,Source Code,When decorated soldier Captain Colter Stevens ...,45612
8,Scooby-Doo! and the Cyber Chase,When Scooby and the gang get trapped in a vide...,15601
9,Crossroads,A wanna-be blues guitar virtuoso seeks a long-...,15392


In [None]:
# Retrieve details, credits, and keywords for each movie
# lists for details
movie_genres_200 = []
movie_tagline_200 = []

# lists for credits
movie_cast_200 = []
movie_crew_200 = []

# lists for keywords
movie_keywords_200 = []

# running a loop through all the movie ids to get the required information (details, credits, keywords)
for movie_id in movie_ids_200:
    success = False
    for attempt in range(max_retries):
        details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=en-US"
        credits_url =  f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=en-US"
        keywords_url = f"https://api.themoviedb.org/3/movie/{movie_id}/keywords"

        headers = {
            "accept": "application/json",
            "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
        }

        response_details = requests.get(details_url, headers=headers)
        response_credits = requests.get(credits_url, headers=headers)
        response_keywords = requests.get(keywords_url, headers=headers)



        if response_details.status_code == 200 and response_credits.status_code == 200 and response_keywords.status_code == 200:

            obj_details_genre = response_details.json().get("genres",[])
            genres = [genre['name'] for genre in obj_details_genre]
            obj_details_tagline = response_details.json().get('tagline',[])
            tagline = obj_details_tagline
            movie_genres_200.append(genres)
            movie_tagline_200.append(tagline)

            obj_credits_cast = response_credits.json().get("cast",[])
            cast = [actor['name'] for actor in obj_credits_cast][:10]
            obj_credits_crew = response_credits.json().get("crew",[])
            crew = [director['name'] for director in obj_credits_crew if director['job'] == 'Director' or director['job'] == 'Producer']
            movie_cast_200.append(cast)
            movie_crew_200.append(crew)

            obj_keywords_key = response_keywords.json().get("keywords",[])
            keywords = [keyword['name'] for keyword in obj_keywords_key]
            movie_keywords_200.append(keywords)

            success = True
            break
        else:
            print(f"Error fetching data for movie {movie_id} (status codes: {response_details.status_code}, {response_credits.status_code}, {response_keywords.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for movie {movie_id} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Combine all information into a single DataFrame
df_metadata_200 = pd.DataFrame({
    'Id': movie_ids_200,
    'Genre':movie_genres_200,
    'Tagline':movie_tagline_200,
    'Cast': movie_cast_200,
    'Crew': movie_crew_200,
    'Keywords': movie_keywords_200
})

print(df_metadata_200.shape)
print(movie_ids_200)
print(movie_genres_200)
print(movie_tagline_200)
print(movie_cast_200)
print(movie_crew_200)
print(movie_keywords_200)

(2000, 6)
[121642, 75624, 11426, 861, 498248, 396263, 318121, 45612, 15601, 15392, 11886, 453395, 297270, 764, 432616, 40794, 36040, 21956, 813, 407448, 265712, 49538, 557, 21032, 10794, 10112, 87, 356296, 338766, 11963, 11537, 11101, 381, 255, 104, 3, 11502, 1491, 571468, 58857, 53457, 547258, 11499, 949423, 259693, 8337, 199, 1040148, 79120, 1029575, 610120, 114150, 590, 493922, 15384, 941, 746036, 413279, 298582, 13446, 408220, 25673, 36940, 604605, 80184, 1433, 550231, 497582, 11943, 9652, 271674, 17814, 9587, 9502, 9325, 1014590, 799583, 30497, 19913, 4710, 379291, 34653, 14320, 3176, 177, 11305, 9837, 9550, 5506, 453278, 197082, 140607, 6145, 846214, 20620, 19067, 2639, 2270, 663870, 403450, 13597, 68734, 60308, 9603, 9560, 558, 300671, 57447, 26842, 14554, 10722, 837, 93289, 14551, 11450, 6619, 1071806, 459003, 286192, 53565, 323661, 156022, 99861, 23566, 809, 594328, 318781, 10440, 1367, 889699, 539681, 15152, 11197, 4107, 1802, 284689, 258230, 11663, 375315, 353577, 43266, 171

In [None]:
df_metadata_200.head()

Unnamed: 0,Id,Genre,Tagline,Cast,Crew,Keywords
0,121642,"[Drama, Romance, War]",A story of love. A story of war. A story of life.,"[Penélope Cruz, Emile Hirsch, Adnan Hasković, ...","[Sergio Castellitto, Sergio Castellitto, Rober...","[rape, civil war, based on novel or book, bosn..."
1,75624,"[Thriller, Animation, Action, Comedy, Horror, ...",,"[Junko Takeuchi, Mie Sonozaki, Masaki Terasoma...","[Masahiko Murata, Fukashi Azuma, Naoji Hounoki...",[anime]
2,11426,"[War, Romance, Drama]","The boldest book of our time… honestly, fearle...","[Burt Lancaster, Montgomery Clift, Deborah Ker...","[Buddy Adler, Fred Zinnemann]","[beach, based on novel or book, hawaii, world ..."
3,861,"[Action, Adventure, Science Fiction]","They stole his mind, now he wants it back.","[Arnold Schwarzenegger, Rachel Ticotin, Sharon...","[Paul Verhoeven, Buzz Feitshans, Ronald Shusett]","[double life, planet mars, based on novel or b..."
4,498248,"[Adventure, Family, Drama]",Friendship is the wildest adventure of all,"[Daniah De Villiers, Mélanie Laurent, Langley ...","[Gilles de Maistre, Jacques Perrin, Gilles de ...","[south africa, lion cub, football (soccer) fan..."


In [None]:
df_metadata_200.shape

(2000, 6)

In [None]:
# applying merge
df_merged_200 = df_200.merge(df_metadata_200[['Id','Genre','Tagline','Cast','Crew','Keywords']])
df_merged_200.head()

Unnamed: 0,Title,Overview,Id,Genre,Tagline,Cast,Crew,Keywords
0,Twice Born,Full-throttle melodrama about an ill-starred r...,121642,"[Drama, Romance, War]",A story of love. A story of war. A story of life.,"[Penélope Cruz, Emile Hirsch, Adnan Hasković, ...","[Sergio Castellitto, Sergio Castellitto, Rober...","[rape, civil war, based on novel or book, bosn..."
1,Naruto Shippuden the Movie: Blood Prison,After his capture for attempted assassination ...,75624,"[Thriller, Animation, Action, Comedy, Horror, ...",,"[Junko Takeuchi, Mie Sonozaki, Masaki Terasoma...","[Masahiko Murata, Fukashi Azuma, Naoji Hounoki...",[anime]
2,From Here to Eternity,"In 1941 Hawaii, a private is cruelly punished ...",11426,"[War, Romance, Drama]","The boldest book of our time… honestly, fearle...","[Burt Lancaster, Montgomery Clift, Deborah Ker...","[Buddy Adler, Fred Zinnemann]","[beach, based on novel or book, hawaii, world ..."
3,Total Recall,Construction worker Douglas Quaid's obsession ...,861,"[Action, Adventure, Science Fiction]","They stole his mind, now he wants it back.","[Arnold Schwarzenegger, Rachel Ticotin, Sharon...","[Paul Verhoeven, Buzz Feitshans, Ronald Shusett]","[double life, planet mars, based on novel or b..."
4,Mia and the White Lion,A young girl from London moves to Africa with ...,498248,"[Adventure, Family, Drama]",Friendship is the wildest adventure of all,"[Daniah De Villiers, Mélanie Laurent, Langley ...","[Gilles de Maistre, Jacques Perrin, Gilles de ...","[south africa, lion cub, football (soccer) fan..."


In [None]:
from google.colab import files
df_merged_200.to_csv('data_200.csv', index=False)
files.download('data_200.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Now, let's get the data for page 200 to 299


In [None]:
import time

#Creating the empty lists
movie_titles_300 = []
movie_overviews_300 = []
movie_ids_300 = []

# Set up retry parameters
max_retries = 5
retry_delay = 5  # seconds

# Loop through pages to get the required data
for page in range(200, 300):  # request up to 199 pages
    url = f"https://api.themoviedb.org/3/movie/top_rated?language=en-US&page={page}"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
    }

    success = False
    for attempt in range(max_retries):
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            movie_data = response.json().get("results", [])

            # Extract titles and overviews
            temp_titles = [movie['title'] for movie in movie_data]
            temp_overviews = [movie['overview'] for movie in movie_data]
            temp_ids = [movie['id'] for movie in movie_data]

            movie_titles_300.extend(temp_titles)
            movie_overviews_300.extend(temp_overviews)
            movie_ids_300.extend(temp_ids)

            success = True
            break
        else:
            print(f"Error fetching data for page {page} (status code {response.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for page {page} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Create DataFrame
df_300 = pd.DataFrame({
    'Title': movie_titles_300,
    'Overview': movie_overviews_300,
    'Id': movie_ids_300
})

print(df_300.shape)

(2000, 3)


In [None]:
df_300.head(20)

Unnamed: 0,Title,Overview,Id
0,Riki-Oh: The Story of Ricky,"In 2001, where all correctional facilities hav...",17467
1,Rio Grande,Lt. Col. Kirby Yorke is posted on the Texas fr...,11617
2,Sister Act,A Reno singer witnesses a mob murder and the c...,2005
3,Blood: The Last Vampire,"In Japan, the vampire-hunter Saya, who is a po...",919
4,Champions,A stubborn and hotheaded minor league basketba...,933419
5,The Sisters Brothers,"Oregon, 1851. Hermann Kermit Warm, a chemist a...",440161
6,Everest,Inspired by the incredible events surrounding ...,253412
7,Che: Part One,"The Argentine, begins as Che and a band of Cub...",8881
8,The Best Years,"Italy from the '80s to the present day, throug...",606566
9,Respect,The rise of Aretha Franklin’s career from a ch...,592863


In [None]:
# Retrieve details, credits, and keywords for each movie
# lists for details
movie_genres_300 = []
movie_tagline_300 = []

# lists for credits
movie_cast_300 = []
movie_crew_300 = []

# lists for keywords
movie_keywords_300 = []

# running a loop through all the movie ids to get the required information (details, credits, keywords)
for movie_id in movie_ids_300:
    success = False
    for attempt in range(max_retries):
        details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=en-US"
        credits_url =  f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=en-US"
        keywords_url = f"https://api.themoviedb.org/3/movie/{movie_id}/keywords"

        headers = {
            "accept": "application/json",
            "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
        }

        response_details = requests.get(details_url, headers=headers)
        response_credits = requests.get(credits_url, headers=headers)
        response_keywords = requests.get(keywords_url, headers=headers)



        if response_details.status_code == 200 and response_credits.status_code == 200 and response_keywords.status_code == 200:

            obj_details_genre = response_details.json().get("genres",[])
            genres = [genre['name'] for genre in obj_details_genre]
            obj_details_tagline = response_details.json().get('tagline',[])
            tagline = obj_details_tagline
            movie_genres_300.append(genres)
            movie_tagline_300.append(tagline)

            obj_credits_cast = response_credits.json().get("cast",[])
            cast = [actor['name'] for actor in obj_credits_cast][:10]
            obj_credits_crew = response_credits.json().get("crew",[])
            crew = [director['name'] for director in obj_credits_crew if director['job'] == 'Director' or director['job'] == 'Producer']
            movie_cast_300.append(cast)
            movie_crew_300.append(crew)

            obj_keywords_key = response_keywords.json().get("keywords",[])
            keywords = [keyword['name'] for keyword in obj_keywords_key]
            movie_keywords_300.append(keywords)

            success = True
            break
        else:
            print(f"Error fetching data for movie {movie_id} (status codes: {response_details.status_code}, {response_credits.status_code}, {response_keywords.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for movie {movie_id} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Combine all information into a single DataFrame
df_metadata_300 = pd.DataFrame({
    'Id': movie_ids_300,
    'Genre':movie_genres_300,
    'Tagline':movie_tagline_300,
    'Cast': movie_cast_300,
    'Crew': movie_crew_300,
    'Keywords': movie_keywords_300
})

print(df_metadata_300.shape)
print(movie_ids_300)
print(movie_genres_300)
print(movie_tagline_300)
print(movie_cast_300)
print(movie_crew_300)
print(movie_keywords_300)

(2000, 6)
[17467, 11617, 2005, 919, 933419, 440161, 253412, 8881, 606566, 592863, 299537, 171337, 16366, 277, 884605, 41110, 39013, 9563, 6961, 4523, 596161, 82992, 11774, 9684, 9314, 1903, 912, 933131, 614933, 253331, 116979, 76726, 1947, 261102, 84175, 9972, 620725, 207686, 86838, 11004, 9319, 424781, 370663, 157841, 150689, 10115, 419635, 76535, 72190, 57737, 26171, 9270, 626412, 460059, 61667, 7443, 514921, 225574, 11708, 482981, 41233, 33542, 444090, 438747, 256917, 131631, 37735, 12281, 9387, 597156, 41216, 13342, 8976, 401847, 654895, 283704, 431, 619778, 14609, 525686, 525661, 339405, 10222, 9655, 567797, 181808, 9737, 744276, 580489, 57243, 13648, 619730, 449443, 338768, 262551, 33908, 16962, 11054, 3509, 37768, 25941, 49367, 11843, 9443, 1858, 755566, 587807, 508664, 170522, 107985, 20115, 776797, 733156, 318917, 158011, 56415, 188166, 13682, 11144, 10855, 9647, 5994, 5038, 332979, 228161, 30596, 691, 454, 475132, 437342, 252178, 22051, 19997, 8366, 1946, 466, 636706, 369885,

In [None]:
# applying merge
df_merged_300 = df_300.merge(df_metadata_300[['Id','Genre','Tagline','Cast','Crew','Keywords']])
df_merged_300.head()

Unnamed: 0,Title,Overview,Id,Genre,Tagline,Cast,Crew,Keywords
0,Riki-Oh: The Story of Ricky,"In 2001, where all correctional facilities hav...",17467,"[Action, Crime, Science Fiction]",,"[Louis Fan Siu-Wong, Gloria Yip Wan-Yee, Yukar...","[Chan Dung Chow, Lam Ngai Kai]","[martial arts, based on manga, held captive, d..."
1,Rio Grande,Lt. Col. Kirby Yorke is posted on the Texas fr...,11617,[Western],John Ford's Most Powerful Drama. The Breathtak...,"[John Wayne, Maureen O'Hara, Ben Johnson, Clau...","[John Ford, Merian C. Cooper, John Ford]","[texas, settler, apache nation, usa–mexico bor..."
2,Sister Act,A Reno singer witnesses a mob murder and the c...,2005,"[Music, Comedy]",No booze. No sex. No drugs. No way.,"[Whoopi Goldberg, Maggie Smith, Kathy Najimy, ...","[Emile Ardolino, Teri Schwartz]","[concert, nun, church choir, witness protectio..."
3,Blood: The Last Vampire,"In Japan, the vampire-hunter Saya, who is a po...",919,"[Animation, Fantasy, Horror]",,"[Youki Kudoh, Saemi Nakamura, Joe Romersa, Reb...","[Yukio Nagasaki, Hiroyuki Kitakubo]","[vampire, military base, jeep, vampire hunter ..."
4,Champions,A stubborn and hotheaded minor league basketba...,933419,"[Comedy, Drama]",Every dream team starts somewhere.,"[Woody Harrelson, Kaitlin Olson, Cheech Marin,...","[Bobby Farrelly, Paul Brooks, Scott Niemeyer, ...","[basketball, special olympics, basketball team..."


In [None]:
from google.colab import files
df_merged_300.to_csv('data_300.csv', index=False)
files.download('data_300.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Now, let's get the data for page 300 to 399


In [None]:
import time

#Creating the empty lists
movie_titles_400 = []
movie_overviews_400 = []
movie_ids_400 = []

# Set up retry parameters
max_retries = 5
retry_delay = 5  # seconds

# Loop through pages to get the required data
for page in range(300, 400):  # request up to 199 pages
    url = f"https://api.themoviedb.org/3/movie/top_rated?language=en-US&page={page}"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
    }

    success = False
    for attempt in range(max_retries):
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            movie_data = response.json().get("results", [])

            # Extract titles and overviews
            temp_titles = [movie['title'] for movie in movie_data]
            temp_overviews = [movie['overview'] for movie in movie_data]
            temp_ids = [movie['id'] for movie in movie_data]

            movie_titles_400.extend(temp_titles)
            movie_overviews_400.extend(temp_overviews)
            movie_ids_400.extend(temp_ids)

            success = True
            break
        else:
            print(f"Error fetching data for page {page} (status code {response.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for page {page} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Create DataFrame
df_400 = pd.DataFrame({
    'Title': movie_titles_400,
    'Overview': movie_overviews_400,
    'Id': movie_ids_400
})

print(df_400.shape)

(2000, 3)


In [None]:
df_400.head()

Unnamed: 0,Title,Overview,Id
0,Tale of Tales,The Queen of Selvascura risks everything to be...,314405
1,EVA,"In 2041, humans live side-by-side with robots ...",77561
2,Ong Bak 2,Moments from death a young man is rescued by a...,16353
3,Ladder 49,"Under the watchful eye of his mentor, Captain ...",11128
4,Vantage Point,The attempted assassination of the American pr...,7461


In [None]:
# Retrieve details, credits, and keywords for each movie
# lists for details
movie_genres_400 = []
movie_tagline_400 = []

# lists for credits
movie_cast_400 = []
movie_crew_400 = []

# lists for keywords
movie_keywords_400 = []

# running a loop through all the movie ids to get the required information (details, credits, keywords)
for movie_id in movie_ids_400:
    success = False
    for attempt in range(max_retries):
        details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=en-US"
        credits_url =  f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=en-US"
        keywords_url = f"https://api.themoviedb.org/3/movie/{movie_id}/keywords"

        headers = {
            "accept": "application/json",
            "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
        }

        response_details = requests.get(details_url, headers=headers)
        response_credits = requests.get(credits_url, headers=headers)
        response_keywords = requests.get(keywords_url, headers=headers)



        if response_details.status_code == 200 and response_credits.status_code == 200 and response_keywords.status_code == 200:

            obj_details_genre = response_details.json().get("genres",[])
            genres = [genre['name'] for genre in obj_details_genre]
            obj_details_tagline = response_details.json().get('tagline',[])
            tagline = obj_details_tagline
            movie_genres_400.append(genres)
            movie_tagline_400.append(tagline)

            obj_credits_cast = response_credits.json().get("cast",[])
            cast = [actor['name'] for actor in obj_credits_cast][:10]
            obj_credits_crew = response_credits.json().get("crew",[])
            crew = [director['name'] for director in obj_credits_crew if director['job'] == 'Director' or director['job'] == 'Producer']
            movie_cast_400.append(cast)
            movie_crew_400.append(crew)

            obj_keywords_key = response_keywords.json().get("keywords",[])
            keywords = [keyword['name'] for keyword in obj_keywords_key]
            movie_keywords_400.append(keywords)

            success = True
            break
        else:
            print(f"Error fetching data for movie {movie_id} (status codes: {response_details.status_code}, {response_credits.status_code}, {response_keywords.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for movie {movie_id} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Combine all information into a single DataFrame
df_metadata_400 = pd.DataFrame({
    'Id': movie_ids_400,
    'Genre':movie_genres_400,
    'Tagline':movie_tagline_400,
    'Cast': movie_cast_400,
    'Crew': movie_crew_400,
    'Keywords': movie_keywords_400
})

print(df_metadata_400.shape)
print(movie_ids_400)
print(movie_genres_400)
print(movie_tagline_400)
print(movie_cast_400)
print(movie_crew_400)
print(movie_keywords_400)

(2000, 6)
[314405, 77561, 16353, 11128, 7461, 460668, 300667, 10337, 10074, 2275, 670203, 41602, 6488, 27429, 11460, 6687, 681, 294795, 294272, 271736, 271736, 5551, 709, 74018, 43549, 16187, 13022, 11913, 593910, 587996, 528491, 62046, 29437, 10047, 10057, 525041, 523607, 15139, 920081, 733317, 649409, 152795, 13968, 10715, 8054, 60420, 433501, 222461, 38365, 11068, 9759, 532639, 531219, 290751, 209403, 50780, 11680, 6691, 447277, 10017, 189, 293646, 44269, 8491, 515248, 513574, 435615, 356305, 68735, 11081, 10192, 405775, 77949, 21610, 18269, 1734, 315, 387893, 12435, 8007, 57800, 250574, 294652, 9317, 6279, 1687, 674944, 118293, 16248, 13150, 4824, 608, 838330, 799379, 766475, 295699, 13154, 145197, 71688, 20481, 13154, 11008, 617, 802217, 41733, 13523, 11891, 2075, 775996, 286987, 300168, 66195, 13225, 11120, 524369, 262500, 82682, 34588, 13373, 9718, 8077, 746333, 399035, 338970, 592336, 477033, 438348, 112205, 81342, 628900, 3980, 2119, 1647, 714, 299, 157360, 11427, 4247, 1268, 

In [None]:
# applying merge
df_merged_400 = df_400.merge(df_metadata_400[['Id','Genre','Tagline','Cast','Crew','Keywords']])
df_merged_400.head()

Unnamed: 0,Title,Overview,Id,Genre,Tagline,Cast,Crew,Keywords
0,Tale of Tales,The Queen of Selvascura risks everything to be...,314405,"[Romance, Fantasy, Horror]",Desire. Envy. Obsession.,"[Salma Hayek Pinault, Vincent Cassel, Toby Jon...","[Matteo Garrone, Matteo Garrone, Jean Labadie,...","[witch, queen, castle, king, twins, ogre, flea..."
1,EVA,"In 2041, humans live side-by-side with robots ...",77561,"[Science Fiction, Drama]",You cannot program what you feel,"[Daniel Brühl, Marta Etura, Alberto Ammann, Cl...",[Kike Maíllo],"[android, robot, robotics, future earth, human..."
2,Ong Bak 2,Moments from death a young man is rescued by a...,16353,"[Adventure, Action, Thriller]",Warrior. Conqueror. Legend.,"[Tony Jaa, Sarunyu Wongkrachang, Sorapong Chat...","[Panna Rittikrai, Tony Jaa, Prachya Pinkaew, P...","[martial arts, thailand, sequel, muay thai, re..."
3,Ladder 49,"Under the watchful eye of his mentor, Captain ...",11128,"[Drama, Action, Thriller]",Their greatest challenge lies in rescuing one ...,"[Joaquin Phoenix, John Travolta, Jacinda Barre...","[Jay Russell, Casey Silver]","[rescue, hero, fire, disaster, firefighting, l..."
4,Vantage Point,The attempted assassination of the American pr...,7461,"[Drama, Action, Thriller, Crime]",8 strangers. 8 points of view. 1 truth.,"[Dennis Quaid, Matthew Fox, Forest Whitaker, S...","[Pete Travis, Neal H. Moritz, Mark Farrington]","[spain, usa president, bomb, media, war on ter..."


In [None]:
from google.colab import files
df_merged_400.to_csv('data_400.csv', index=False)
files.download('data_400.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Now, let's get the data for page 400 to 499

In [2]:
import time

#Creating the empty lists
movie_titles_500 = []
movie_overviews_500 = []
movie_ids_500 = []

# Set up retry parameters
max_retries = 5
retry_delay = 5  # seconds

# Loop through pages to get the required data
for page in range(400, 500):  # request up to 199 pages
    url = f"https://api.themoviedb.org/3/movie/top_rated?language=en-US&page={page}"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
    }

    success = False
    for attempt in range(max_retries):
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            movie_data = response.json().get("results", [])

            # Extract titles and overviews
            temp_titles = [movie['title'] for movie in movie_data]
            temp_overviews = [movie['overview'] for movie in movie_data]
            temp_ids = [movie['id'] for movie in movie_data]

            movie_titles_500.extend(temp_titles)
            movie_overviews_500.extend(temp_overviews)
            movie_ids_500.extend(temp_ids)

            success = True
            break
        else:
            print(f"Error fetching data for page {page} (status code {response.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for page {page} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Create DataFrame
df_500 = pd.DataFrame({
    'Title': movie_titles_500,
    'Overview': movie_overviews_500,
    'Id': movie_ids_500
})

print(df_500.shape)

(1487, 3)


In [3]:
df_500.head()

Unnamed: 0,Title,Overview,Id
0,Antigang,As a rowdy team of detectives clashes with the...,348060
1,Tesla,The story of the Promethean struggles of Nikol...,517412
2,Shimmer Lake,Shot in reverse day-by-day through a week—a lo...,457962
3,The Haunting in Connecticut 2: Ghosts of Georgia,A young family moves into a historic home in G...,150202
4,Largo Winch II,Propelled to the head of the W Group after the...,57331


In [4]:
# Retrieve details, credits, and keywords for each movie
# lists for details
movie_genres_500 = []
movie_tagline_500 = []

# lists for credits
movie_cast_500 = []
movie_crew_500 = []

# lists for keywords
movie_keywords_500 = []

# running a loop through all the movie ids to get the required information (details, credits, keywords)
for movie_id in movie_ids_500:
    success = False
    for attempt in range(max_retries):
        details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=en-US"
        credits_url =  f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=en-US"
        keywords_url = f"https://api.themoviedb.org/3/movie/{movie_id}/keywords"

        headers = {
            "accept": "application/json",
            "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIzOGIyYjAyNWQ2NjkwMGQ3NWRjYWY0OTgzMTI5MDk1MyIsIm5iZiI6MTcxOTQ4MzY3OC4zMTY4NDEsInN1YiI6IjYyZGQ0NmQxZWE4NGM3MDA2NzRhYTU1ZiIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.AptVqT7Qo28zKdKZYdwSusYN3m1LFgab6cPSe2_zuZo"
        }

        response_details = requests.get(details_url, headers=headers)
        response_credits = requests.get(credits_url, headers=headers)
        response_keywords = requests.get(keywords_url, headers=headers)



        if response_details.status_code == 200 and response_credits.status_code == 200 and response_keywords.status_code == 200:

            obj_details_genre = response_details.json().get("genres",[])
            genres = [genre['name'] for genre in obj_details_genre]
            obj_details_tagline = response_details.json().get('tagline',[])
            tagline = obj_details_tagline
            movie_genres_500.append(genres)
            movie_tagline_500.append(tagline)

            obj_credits_cast = response_credits.json().get("cast",[])
            cast = [actor['name'] for actor in obj_credits_cast][:10]
            obj_credits_crew = response_credits.json().get("crew",[])
            crew = [director['name'] for director in obj_credits_crew if director['job'] == 'Director' or director['job'] == 'Producer']
            movie_cast_500.append(cast)
            movie_crew_500.append(crew)

            obj_keywords_key = response_keywords.json().get("keywords",[])
            keywords = [keyword['name'] for keyword in obj_keywords_key]
            movie_keywords_500.append(keywords)

            success = True
            break
        else:
            print(f"Error fetching data for movie {movie_id} (status codes: {response_details.status_code}, {response_credits.status_code}, {response_keywords.status_code}), retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    if not success:
        print(f"Failed to fetch data for movie {movie_id} after {max_retries} attempts, skipping.")
        continue

    time.sleep(1)  # to avoid hitting rate limits

# Combine all information into a single DataFrame
df_metadata_500 = pd.DataFrame({
    'Id': movie_ids_500,
    'Genre':movie_genres_500,
    'Tagline':movie_tagline_500,
    'Cast': movie_cast_500,
    'Crew': movie_crew_500,
    'Keywords': movie_keywords_500
})

print(df_metadata_500.shape)
print(movie_ids_500)
print(movie_genres_500)
print(movie_tagline_500)
print(movie_cast_500)
print(movie_crew_500)
print(movie_keywords_500)

(1487, 6)
[348060, 517412, 457962, 150202, 57331, 14138, 679989, 440708, 12596, 6439, 133698, 109431, 87567, 59965, 15379, 12277, 513409, 147441, 284289, 238215, 38961, 54540, 8398, 157823, 306, 35626, 23172, 18457, 315319, 77987, 52077, 437626, 36648, 16866, 9957, 9728, 793723, 9988, 7978, 27573, 509874, 334538, 258193, 38541, 19286, 10061, 335791, 6723, 521935, 503, 487702, 418078, 256474, 10131, 56906, 262543, 249070, 69778, 28941, 28597, 2022, 1961, 114478, 44833, 9312, 429202, 383785, 11851, 492621, 10317, 9491, 780154, 461992, 291264, 14223, 11870, 101173, 20606, 13555, 9570, 152259, 72207, 44982, 11517, 2144, 14161, 270010, 244761, 13690, 11074, 768334, 575322, 433627, 342878, 9980, 214030, 81005, 49524, 4442, 93828, 84105, 77953, 12096, 11630, 10596, 1691, 22894, 12138, 9396, 3512, 192134, 71679, 10768, 5, 192149, 174751, 17127, 14536, 70706, 37414, 12569, 11519, 4327, 661791, 423087, 190469, 12139, 826749, 353979, 240916, 14211, 9824, 9754, 675327, 502416, 323676, 107811, 3480

In [5]:
# applying merge
df_merged_500 = df_500.merge(df_metadata_500[['Id','Genre','Tagline','Cast','Crew','Keywords']])
df_merged_500.head()

Unnamed: 0,Title,Overview,Id,Genre,Tagline,Cast,Crew,Keywords
0,Antigang,As a rowdy team of detectives clashes with the...,348060,"[Action, Drama]",Good cops. Bad Attitude.,"[Jean Reno, Alban Lenoir, Caterina Murino, Oum...","[Benjamin Rocher, Raphaël Rocher, Allan Niblo,...","[drug dealer, police, special forces, die hard..."
1,Tesla,The story of the Promethean struggles of Nikol...,517412,"[History, Drama]",My achievements and conquests will be evaluate...,"[Ethan Hawke, Eve Hewson, Jim Gaffigan, Kyle M...","[Michael Almereyda, Uri Singer, Lati Grobman, ...","[inventor, biography, based on true story, nik..."
2,Shimmer Lake,Shot in reverse day-by-day through a week—a lo...,457962,"[Thriller, Crime, Drama, Mystery]",,"[Rainn Wilson, Benjamin Walker, John Michael H...","[Oren Uziel, Adam Saunders, Britton Rizzio]",[]
3,The Haunting in Connecticut 2: Ghosts of Georgia,A young family moves into a historic home in G...,150202,"[Horror, Drama, Thriller]",Based on a true story,"[Chad Michael Murray, Katee Sackhoff, Abigail ...",[Tom Elkins],"[georgia, sequel, new home, ghost, 1990s]"
4,Largo Winch II,Propelled to the head of the W Group after the...,57331,"[Action, Adventure, Thriller]",The Burma Conspiracy,"[Tomer Sisley, Sharon Stone, Ulrich Tukur, Oli...","[Jérôme Salle, Nathalie Gastaldo]","[based on comic, sequel, flashback, jungle, ba..."


In [6]:
from google.colab import files
df_merged_500.to_csv('data_500.csv', index=False)
files.download('data_500.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>