In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time

In [None]:
def clean_title(title):
    ## Remove footnotes and unnecessary parentheses
    return re.sub(r'\[.*?\]', '', title).strip()

In [None]:
url = "https://en.wikipedia.org/wiki/Academy_Award_for_Best_Picture"
headers = {
    'User-Agent': 'MyAwardScraper'
}
response = requests.get(url, headers=headers)
html_content = response.text

In [None]:
response.status_code

In [None]:
soup = BeautifulSoup(response.text, 'html.parser')

## Initialize a list to hold the data
best_picture_data = []
table = soup.find('table', {'class': 'wikitable'})
rows = soup.select("table.wikitable tbody tr")

for row in rows:

    # Identify winner by checking background color
    style = row.get("style", "").strip().lower()
    is_winner = "background:#faeb86" in style

    winner_text = "Yes" if is_winner else "No"

    year_cell = row.find("th")
    if year_cell:
        ## Replace <br> with a space, keep links/text
        for br in year_cell.find_all("br"):
            br.replace_with("/")
        current_year = year_cell.get_text(separator=" ", strip=True)
    ## No else needed; current_year remains unchanged if there's no year_cell

    ## If no <th> in this row, use last seen year
    year = current_year if current_year else ""

    ## Extract film and studio
    td_cells = row.find_all("td")
    film = td_cells[0].get_text(" ", strip=True) if len(td_cells) > 0 else ""
    
    ## Only append if film is not empty
    if film.strip():
        best_picture_data.append([year, film, winner_text])

## Create a DataFrame from the collected data
best_picture_df = pd.DataFrame(best_picture_data, columns=['Year', 'Film Title', 'Winner'])

## Optional: Extract just the year if needed (this is needed only if your year has extra characters)
best_picture_df['Year'] = best_picture_df['Year'].str.extract('(\d{4})')[0]  

## Save the DataFrame to a CSV file
best_picture_df.to_csv('best_picture.csv', index=False)
best_picture_df.head(10)

In [None]:
import json

with open('./keys.json') as fi:
    credenticials = json.load(fi)
api_key = credenticials['api_key']


In [None]:
endpoint = f'https://api.themoviedb.org/3/discover/movie'
params = {
    'api_key' : api_key
          
    }
tmdb_response = requests.get(endpoint,params=params)
tmdb_data  = tmdb_response.json()
time.sleep(0.25)


In [None]:
movie_data = pd.json_normalize(tmdb_data["results"])[["id", 'genre_ids', "original_title", "release_date","vote_count","vote_average"]]

In [9]:
movie_data.head(10)

Unnamed: 0,id,genre_ids,original_title,release_date,vote_count,vote_average
0,1062722,"[18, 27, 14]",Frankenstein,2025-10-17,1450,7.88
1,1054867,"[28, 53, 80]",One Battle After Another,2025-09-23,1414,7.6
2,1248226,"[28, 35, 10751]",Playdate,2025-11-05,177,6.5
3,1116465,"[28, 12, 14]",传说,2024-07-05,63,6.325
4,1084222,"[28, 12, 27]",Operation Blood Hunt,2024-12-12,12,5.5
5,1242898,"[28, 878, 12]",Predator: Badlands,2025-11-05,342,7.4
6,1128650,"[28, 18, 80, 53]",誤判,2024-12-08,105,7.476
7,1296504,"[28, 53, 80]",Stand Your Ground,2025-05-09,29,6.138
8,1429738,"[80, 28, 18]",ตี๋ใหญ่ ฤกษ์ดาวโจร,2025-11-13,9,7.722
9,755898,"[878, 53]",War of the Worlds,2025-07-29,717,4.335


In [10]:
movie_data_1 = pd.DataFrame()
pd.set_option('display.max_colwidth', None)
for release_year in range(2015,2024):
    yearly_movies = pd.DataFrame()
    for page in range(1,6):
        params = {
        'page' :page,
        'api_key' : api_key,
        'sort_by' : 'vote_count.desc',
        'release_date.gte' : f"{release_year}-01-01",
        'release_date.lte' : f"{release_year}-12-31"
        #'primary_release_year' : release_year
        }
        api_response = requests.get(endpoint, params=params)

        data = api_response.json()
        data
        
        page_data= pd.json_normalize(data['results'])
        yearly_movies = pd.concat([page_data,yearly_movies], ignore_index=True)
        
        yearly_movies["release_year"] = pd.to_datetime(yearly_movies["release_date"], errors="coerce").dt.year
        yearly_movies["release_year"] = release_year
  

In [11]:
yearly_movies

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,release_year
0,False,/kBSSbN1sOiJtXjAGVZXxHJR9Kox.jpg,"[28, 18]",361743,en,Top Gun: Maverick,"After more than thirty years of service as one of the Navy’s top aviators, and dodging the advancement in rank that would ground him, Pete “Maverick” Mitchell finds himself training a detachment of TOP GUN graduates for a specialized mission the likes of which no living pilot has ever seen.",15.4904,/62HCnUTziyWcpDaBO2i1DX17ljH.jpg,2022-05-21,Top Gun: Maverick,False,8.165,10335,2023
1,False,/5MnP0h7RcUCeX7gpxMYoMScmfq7.jpg,"[14, 28, 12, 16, 35, 10751]",82702,en,How to Train Your Dragon 2,"Five years have passed since Hiccup and Toothless united the dragons and Vikings of Berk. Now, they spend their time charting unmapped territories. During one of their adventures, the pair discover a secret cave that houses hundreds of wild dragons -- and a mysterious dragon rider. Hiccup and Toothless find themselves at the center of a battle to protect Berk from a power-hungry warrior.",9.8985,/d13Uj86LdbDLrfDoHR5aDOFYyJC.jpg,2014-06-05,How to Train Your Dragon 2,False,7.676,10111,2023
2,False,/9n2tJBplPbgR2ca05hS5CKXwP2c.jpg,"[10751, 35, 12, 16, 14]",502356,en,The Super Mario Bros. Movie,"While working underground to fix a water main, Brooklyn plumbers—and brothers—Mario and Luigi are transported down a mysterious pipe and wander into a magical new world. But when the brothers are separated, Mario embarks on an epic quest to find Luigi.",26.5219,/qNBAXBIQlnOThrVvA6mA2B5ggV6.jpg,2023-04-05,The Super Mario Bros. Movie,False,7.603,10065,2023
3,False,/16lk65YfrDFIr6evkWRjSeOOSws.jpg,"[14, 16, 10751]",9479,en,The Nightmare Before Christmas,"Tired of scaring humans every October 31 with the same old bag of tricks, Jack Skellington, the spindly king of Halloween Town, kidnaps Santa Claus and plans to deliver shrunken heads and other ghoulish gifts to children on Christmas morning. But as Christmas approaches, Jack's rag-doll girlfriend, Sally, tries to foil his misguided plans.",13.9297,/oQffRNjK8e19rF7xVYEN8ew0j7b.jpg,1993-10-09,The Nightmare Before Christmas,False,7.800,10004,2023
4,False,/dTn1HGhCuq26IoBq9qKsiYNFxfq.jpg,"[14, 28, 12]",453395,en,Doctor Strange in the Multiverse of Madness,"Doctor Strange, with the help of mystical allies both old and new, traverses the mind-bending and dangerous alternate realities of the Multiverse to confront a mysterious new adversary.",11.9835,/ddJcSKbcp4rKZTmuyWaMhuwcfMz.jpg,2022-05-04,Doctor Strange in the Multiverse of Madness,False,7.240,9872,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,False,/jdHsptJbtalEuVhCV5i7kSC3g0x.jpg,"[18, 80]",238,en,The Godfather,"Spanning the years 1945 to 1955, a chronicle of the fictional Italian-American Corleone crime family. When organized crime family patriarch, Vito Corleone barely survives an attempt on his life, his youngest son, Michael steps in to take care of the would-be killers, launching a campaign of bloody revenge.",22.0513,/3bhkrj58Vtu7enYsRolD1fZdja1.jpg,1972-03-14,The Godfather,False,8.700,22064,2023
96,False,/ew5FcYiRhTYNJAkxoVPMNlCOdVn.jpg,"[18, 80, 28]",272,en,Batman Begins,"Driven by tragedy, billionaire Bruce Wayne dedicates his life to uncovering and defeating the corruption that plagues his home, Gotham City. Unable to work within the system, he instead creates a new identity, a symbol of fear for the criminal underworld - The Batman.",11.6893,/sPX89Td70IDDjVr85jdSBb4rWGr.jpg,2005-06-10,Batman Begins,False,7.700,21968,2023
97,False,/2w4xG178RpB4MDAIfTkqAuSJzec.jpg,"[12, 28, 878]",11,en,Star Wars,Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.,14.9483,/6FfCtAuVAW8XJjZ7eWeLibRLWTw.jpg,1977-05-25,Star Wars,False,8.204,21647,2023
98,False,/8f9dnOtpArDrOMEylpSN9Sc6fuz.jpg,"[12, 14]",674,en,Harry Potter and the Goblet of Fire,"When Harry Potter's name emerges from the Goblet of Fire, he becomes a competitor in a grueling battle for glory among three wizarding schools—the Triwizard Tournament. But since Harry never submitted his name for the Tournament, who did? Now Harry must confront a deadly dragon, fierce water demons and an enchanted maze only to find himself in the cruel grasp of He Who Must Not Be Named.",22.1742,/fECBtHlr0RB3foNHDiCBXeg9Bv9.jpg,2005-11-16,Harry Potter and the Goblet of Fire,False,7.799,21585,2023


In [12]:
movie_data_1= pd.concat([yearly_movies,movie_data_1], ignore_index=True)
movie_data_1_df = movie_data_1

In [13]:
movie_data_1_df.head(10)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,release_year
0,False,/kBSSbN1sOiJtXjAGVZXxHJR9Kox.jpg,"[28, 18]",361743,en,Top Gun: Maverick,"After more than thirty years of service as one of the Navy’s top aviators, and dodging the advancement in rank that would ground him, Pete “Maverick” Mitchell finds himself training a detachment of TOP GUN graduates for a specialized mission the likes of which no living pilot has ever seen.",15.4904,/62HCnUTziyWcpDaBO2i1DX17ljH.jpg,2022-05-21,Top Gun: Maverick,False,8.165,10335,2023
1,False,/5MnP0h7RcUCeX7gpxMYoMScmfq7.jpg,"[14, 28, 12, 16, 35, 10751]",82702,en,How to Train Your Dragon 2,"Five years have passed since Hiccup and Toothless united the dragons and Vikings of Berk. Now, they spend their time charting unmapped territories. During one of their adventures, the pair discover a secret cave that houses hundreds of wild dragons -- and a mysterious dragon rider. Hiccup and Toothless find themselves at the center of a battle to protect Berk from a power-hungry warrior.",9.8985,/d13Uj86LdbDLrfDoHR5aDOFYyJC.jpg,2014-06-05,How to Train Your Dragon 2,False,7.676,10111,2023
2,False,/9n2tJBplPbgR2ca05hS5CKXwP2c.jpg,"[10751, 35, 12, 16, 14]",502356,en,The Super Mario Bros. Movie,"While working underground to fix a water main, Brooklyn plumbers—and brothers—Mario and Luigi are transported down a mysterious pipe and wander into a magical new world. But when the brothers are separated, Mario embarks on an epic quest to find Luigi.",26.5219,/qNBAXBIQlnOThrVvA6mA2B5ggV6.jpg,2023-04-05,The Super Mario Bros. Movie,False,7.603,10065,2023
3,False,/16lk65YfrDFIr6evkWRjSeOOSws.jpg,"[14, 16, 10751]",9479,en,The Nightmare Before Christmas,"Tired of scaring humans every October 31 with the same old bag of tricks, Jack Skellington, the spindly king of Halloween Town, kidnaps Santa Claus and plans to deliver shrunken heads and other ghoulish gifts to children on Christmas morning. But as Christmas approaches, Jack's rag-doll girlfriend, Sally, tries to foil his misguided plans.",13.9297,/oQffRNjK8e19rF7xVYEN8ew0j7b.jpg,1993-10-09,The Nightmare Before Christmas,False,7.8,10004,2023
4,False,/dTn1HGhCuq26IoBq9qKsiYNFxfq.jpg,"[14, 28, 12]",453395,en,Doctor Strange in the Multiverse of Madness,"Doctor Strange, with the help of mystical allies both old and new, traverses the mind-bending and dangerous alternate realities of the Multiverse to confront a mysterious new adversary.",11.9835,/ddJcSKbcp4rKZTmuyWaMhuwcfMz.jpg,2022-05-04,Doctor Strange in the Multiverse of Madness,False,7.24,9872,2023
5,False,/f133nWlU6yUkZqqwcCXCbTEJWHr.jpg,"[12, 28]",87,en,Indiana Jones and the Temple of Doom,"After arriving in India, Indiana Jones is asked by a desperate village to find a mystical stone. He agrees – and stumbles upon a secret cult plotting a terrible plan in the catacombs of an ancient palace.",6.6914,/gpdVNUaa4LhRMLfJOPj1AZdhAZ3.jpg,1984-05-23,Indiana Jones and the Temple of Doom,False,7.302,9782,2023
6,False,/1XqIhsqnAozznGhxlGdI0GPcCro.jpg,"[18, 10752]",423,en,The Pianist,"The true story of pianist Władysław Szpilman's experiences in Warsaw during the Nazi occupation. When the Jews of the city find themselves forced into a ghetto, Szpilman finds work playing in a café; and when his family is deported in 1942, he stays behind, works for a while as a laborer, and eventually goes into hiding in the ruins of the war-torn city.",5.4416,/2hFvxCCWrTmCYwfy7yum0GKRi3Y.jpg,2002-09-17,The Pianist,False,8.38,9767,2023
7,False,/hfWsEZRTpqSfzofIhjjy7VL4o61.jpg,"[9648, 28, 878]",13183,en,Watchmen,"In a gritty and alternate 1985, the glory days of costumed vigilantes have been brought to a close by a government crackdown. But after one of the masked veterans is brutally murdered, an investigation into the killer is initiated. The reunited heroes set out to prevent their own destruction, but in doing so they uncover a sinister plot that puts all of humanity in grave danger.",6.27,/aVURelN3pM56lFM7Dgfs5TixcIf.jpg,2009-03-04,Watchmen,False,7.346,9648,2023
8,False,/dF2c20sRDgySZLaLS2OUerL07sz.jpg,"[12, 14, 18, 10749]",18239,en,The Twilight Saga: New Moon,"Forks, Washington resident Bella Swan is reeling from the departure of her vampire love, Edward Cullen, and finds comfort in her friendship with Jacob Black, a werewolf. But before she knows it, she's thrust into a centuries-old conflict, and her desire to be with Edward at any cost leads her to take greater and greater risks.",6.7435,/k2qTooPlHffgNABNWxeJdGMglPK.jpg,2009-11-18,The Twilight Saga: New Moon,False,5.994,9540,2023
9,False,/7py8kUCYaOdFn1TfVS87BDBySOz.jpg,"[35, 12, 878]",550988,en,Free Guy,"A bank teller discovers he is actually a background player in an open-world video game, and decides to become the hero of his own story. Now, in a world where there are no limits, he is determined to be the guy who saves his world his way before it's too late.",11.5858,/6PFJrMvoQwBxQITLYHj09VeJ37q.jpg,2021-08-11,Free Guy,False,7.463,9379,2023


In [14]:
movie_data_1_df.to_csv('movies_2015_2024.csv', index=False)

PermissionError: [Errno 13] Permission denied: 'movies_2015_2024.csv'

In [None]:
endpoint = f'https://api.themoviedb.org/3/discover/movie'
params = {
    'api_key' : api_key,
          
    }
tmdb_response = requests.get(endpoint,params)
tmdb_data  = tmdb_response.json()
tmdb_df = pd.json_normalize(tmdb_data)
##all_movie_data = pd.concat([tamdb_data,tmdb_df], ignore_index=True)
tmdb_df.head(10)
time.sleep(0.25)
##all_movie_data=all_movie_data[['title','vote_average','vote_count','budget','revenue','imdb_id']]
tmdb_df.head(10)
    

In [None]:
data


In [None]:
endpoint = f'https://api.themoviedb.org/3/discover/movie'
params = {
        'api_key' : api_key,
              
        }
api_response = requests.get(endpoint, params=params)
tmdb_response = requests.get(tmdb_endpoint,params)
    tmdb_data  = tmdb_response.json()
    tmdb_df = pd.json_normalize(tmdb_data)
    all_movie_data = pd.concat([all_movie_data,tmdb_df], ignore_index=True)
    time.sleep(0.25)
all_movie_data=all_movie_data[['title','vote_average','vote_count','budget','revenue','imdb_id']]
print(all_movie_data)
movie_data = pd.DataFrame()
pd.set_option('display.max_colwidth', None)
for release_year in range(2015,2024):
    yearly_movies = pd.DataFrame()
    for page in range(1,6):
        params = {
        'page' :page,
        'api_key' : api_key,
        'sort_by' : 'vote_count.desc',
        'release_date.gte' : f"{release_year}-01-01",
        'release_date.lte' : f"{release_year}-12-31"
        #'primary_release_year' : release_year
        }
        api_response = requests.get(endpoint, params=params)

        data = api_response.json()
        data
        
        page_data= pd.json_normalize(data['results'])
        yearly_movies = pd.concat([page_data,yearly_movies], ignore_index=True)
        
        ##yearly_movies["release_year"] = pd.to_datetime(yearly_movies["release_date"], errors="coerce").dt.year
        yearly_movies["release_year"] = release_year
    movie_data= pd.concat([yearly_movies,movie_data], ignore_index=True)
        
        ##print(movie_data)
#movie_data=movie_data[['title','genre_ids','vote_average','vote_count','release_year']]
#movie_data.columns
all_movie_data = pd.DataFrame()
for movie_id in movie_data['id']:
    
    
    tmdb_endpoint = f'https://api.themoviedb.org/3/movie/{movie_id}'
    params = {
        'api_key' : api_key,
              
        }

In [None]:
movie_data = pd.DataFrame()
pd.set_option('display.max_colwidth', None)
for release_year in range(2015,2024):
    yearly_movies = pd.DataFrame()
    for page in range(1,6):
        params = {
        'page' :page,
        'api_key' : api_key,
        'sort_by' : 'vote_count.desc',
        'release_date.gte' : f"{release_year}-01-01",
        'release_date.lte' : f"{release_year}-12-31"
        #'primary_release_year' : release_year
        }
        api_response = requests.get(endpoint, params=params)

        data = api_response.json()
        data
        
        page_data= pd.json_normalize(data['results'])
        yearly_movies = pd.concat([page_data,yearly_movies], ignore_index=True)
        
        ##yearly_movies["release_year"] = pd.to_datetime(yearly_movies["release_date"], errors="coerce").dt.year
        yearly_movies["release_year"] = release_year
    movie_data= pd.concat([yearly_movies,movie_data], ignore_index=True)
        
        ##print(movie_data)
#movie_data=movie_data[['title','genre_ids','vote_average','vote_count','release_year']]
#movie_data.columns
all_movie_data = pd.DataFrame()
for movie_id in movie_data['id']:
    
    
    tmdb_endpoint = f'https://api.themoviedb.org/3/movie/{movie_id}'
    params = {
        'api_key' : api_key,
              
        }

    tmdb_response = requests.get(tmdb_endpoint,params)
    tmdb_data  = tmdb_response.json()
    tmdb_df = pd.json_normalize(tmdb_data)
    all_movie_data = pd.concat([all_movie_data,tmdb_df], ignore_index=True)
    time.sleep(0.25)
all_movie_data=all_movie_data[['title','vote_average','vote_count','budget','revenue','imdb_id']]
print(all_movie_data)