# Project 2: Oscar winners vs box office success 

## Import relevant libraries:

In [246]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

## Scrape the table on box office mojo to get the names of all the top-grossing and the year:

In [247]:
boxoffice_url = "https://www.boxofficemojo.com/year/"
boxoffice_html= requests.get(boxoffice_url).text
boxoffice_soup = BeautifulSoup(boxoffice_html)

In [248]:
boxoffice_table = boxoffice_soup.select("table, tr")
len(boxoffice_table)

50

In [249]:
boxoffice_table = boxoffice_table[0]

In [250]:
rows = boxoffice_table.select("tr")

In [251]:
first_row = rows[0]
first_row_cells = first_row.select("th")

In [252]:
boxoffice_entries = [
    [ cell.text for cell in row.select("td") ]
    for row in rows
]

In [253]:
boxoffice_entries 

[[],
 ['2024', '$4,258,814,831', '-', '376', '$11,326,635', 'Inside Out 2'],
 ['2023', '$8,908,297,471', '+20.9%', '590', '$15,098,809', 'Barbie'],
 ['2022',
  '$7,369,521,886',
  '+64.4%',
  '499',
  '$14,768,580',
  'Top Gun: Maverick'],
 ['2021',
  '$4,483,010,556',
  '+112.1%',
  '441',
  '$10,165,556',
  'Spider-Man: No Way Home'],
 ['2020',
  '$2,113,846,800',
  '-81.4%',
  '456',
  '$4,635,628',
  'Bad Boys for Life'],
 ['2019',
  '$11,363,360,759',
  '-4.4%',
  '910',
  '$12,487,209',
  'Avengers: Endgame'],
 ['2018', '$11,892,160,011', '+7.4%', '993', '$11,975,991', 'Black Panther'],
 ['2017',
  '$11,075,387,520',
  '-2.6%',
  '854',
  '$12,968,837',
  'Star Wars: Episode VIII - The Last Jedi'],
 ['2016', '$11,375,225,455', '+2%', '855', '$13,304,357', 'Finding Dory'],
 ['2015', '$11,148,780,747', '+7.5%', '845', '$13,193,823', 'Jurassic World'],
 ['2014',
  '$10,368,861,849',
  '-5.4%',
  '849',
  '$12,213,029',
  'Guardians of the Galaxy'],
 ['2013', '$10,955,524,800', '+1%'

In [254]:
boxoffice_df_mojo = pd.DataFrame([
    [ cell.text for cell in row.select("td")]
    for row in rows ] ,
columns = [th.text for th in boxoffice_table.select("th")])

In [255]:
boxoffice_df_mojo

Unnamed: 0,Year,Total Gross,%± LY,Releases,Average,#1 Release\n
0,,,,,,
1,2024.0,"$4,258,814,831",-,376.0,"$11,326,635",Inside Out 2
2,2023.0,"$8,908,297,471",+20.9%,590.0,"$15,098,809",Barbie
3,2022.0,"$7,369,521,886",+64.4%,499.0,"$14,768,580",Top Gun: Maverick
4,2021.0,"$4,483,010,556",+112.1%,441.0,"$10,165,556",Spider-Man: No Way Home
5,2020.0,"$2,113,846,800",-81.4%,456.0,"$4,635,628",Bad Boys for Life
6,2019.0,"$11,363,360,759",-4.4%,910.0,"$12,487,209",Avengers: Endgame
7,2018.0,"$11,892,160,011",+7.4%,993.0,"$11,975,991",Black Panther
8,2017.0,"$11,075,387,520",-2.6%,854.0,"$12,968,837",Star Wars: Episode VIII - The Last Jedi
9,2016.0,"$11,375,225,455",+2%,855.0,"$13,304,357",Finding Dory


In [256]:
boxoffice_df_mojo = boxoffice_df_mojo[1:].drop(columns=['Total Gross', '%± LY', 'Releases', 'Average'])
boxoffice_df_mojo.rename(columns={'#1 Release\n': 'Top Grossing Movie (TGM)'}, inplace=True)
boxoffice_df_mojo

Unnamed: 0,Year,Top Grossing Movie (TGM)
1,2024,Inside Out 2
2,2023,Barbie
3,2022,Top Gun: Maverick
4,2021,Spider-Man: No Way Home
5,2020,Bad Boys for Life
6,2019,Avengers: Endgame
7,2018,Black Panther
8,2017,Star Wars: Episode VIII - The Last Jedi
9,2016,Finding Dory
10,2015,Jurassic World


In [257]:
boxoffice_df_mojo['Year'] = boxoffice_df_mojo['Year'].astype(int)

## Scrape the domestic gross of each movie listed in the DataFrame:

#### (The total gross in the table I scraped above is the total gross of ALL the movies that came out that year so I need to go into each individual link to find the total gross for each movie)

In [258]:
td_tags = boxoffice_soup.find_all("td", class_="mojo-cell-wide")
td_tags

[<td class="a-text-left mojo-field-type-release mojo-cell-wide"><a class="a-link-normal" href="/release/rl3638199041/?ref_=bo_yl_table_1">Inside Out 2</a></td>,
 <td class="a-text-left mojo-field-type-release mojo-cell-wide"><a class="a-link-normal" href="/release/rl1077904129/?ref_=bo_yl_table_2">Barbie</a></td>,
 <td class="a-text-left mojo-field-type-release mojo-cell-wide"><a class="a-link-normal" href="/release/rl2500036097/?ref_=bo_yl_table_3">Top Gun: Maverick</a></td>,
 <td class="a-text-left mojo-field-type-release mojo-cell-wide"><a class="a-link-normal" href="/release/rl2869659137/?ref_=bo_yl_table_4">Spider-Man: No Way Home</a></td>,
 <td class="a-text-left mojo-field-type-release mojo-cell-wide"><a class="a-link-normal" href="/release/rl1182631425/?ref_=bo_yl_table_5">Bad Boys for Life</a></td>,
 <td class="a-text-left mojo-field-type-release mojo-cell-wide"><a class="a-link-normal" href="/release/rl3059975681/?ref_=bo_yl_table_6">Avengers: Endgame</a></td>,
 <td class="a-

In [259]:
links = []

for td in td_tags:
    a_tag = td.find("a", class_="a-link-normal")
    link = a_tag.get("href")
    links.append(link)

links

['/release/rl3638199041/?ref_=bo_yl_table_1',
 '/release/rl1077904129/?ref_=bo_yl_table_2',
 '/release/rl2500036097/?ref_=bo_yl_table_3',
 '/release/rl2869659137/?ref_=bo_yl_table_4',
 '/release/rl1182631425/?ref_=bo_yl_table_5',
 '/release/rl3059975681/?ref_=bo_yl_table_6',
 '/release/rl2992866817/?ref_=bo_yl_table_7',
 '/release/rl2708702721/?ref_=bo_yl_table_8',
 '/release/rl3764946433/?ref_=bo_yl_table_9',
 '/release/rl2371716609/?ref_=bo_yl_table_10',
 '/release/rl3177416193/?ref_=bo_yl_table_11',
 '/release/rl1532659201/?ref_=bo_yl_table_12',
 '/release/rl709199361/?ref_=bo_yl_table_13',
 '/release/rl1265337857/?ref_=bo_yl_table_14',
 '/release/rl876971521/?ref_=bo_yl_table_15',
 '/release/rl3027731969/?ref_=bo_yl_table_16',
 '/release/rl3729098241/?ref_=bo_yl_table_17',
 '/release/rl913540609/?ref_=bo_yl_table_18',
 '/release/rl4083713537/?ref_=bo_yl_table_19',
 '/release/rl2943583745/?ref_=bo_yl_table_20',
 '/release/rl24217089/?ref_=bo_yl_table_21',
 '/release/rl2723382785/?re

In [260]:
updated_links = [f"https://www.boxofficemojo.com{link}" for link in links]
updated_links

['https://www.boxofficemojo.com/release/rl3638199041/?ref_=bo_yl_table_1',
 'https://www.boxofficemojo.com/release/rl1077904129/?ref_=bo_yl_table_2',
 'https://www.boxofficemojo.com/release/rl2500036097/?ref_=bo_yl_table_3',
 'https://www.boxofficemojo.com/release/rl2869659137/?ref_=bo_yl_table_4',
 'https://www.boxofficemojo.com/release/rl1182631425/?ref_=bo_yl_table_5',
 'https://www.boxofficemojo.com/release/rl3059975681/?ref_=bo_yl_table_6',
 'https://www.boxofficemojo.com/release/rl2992866817/?ref_=bo_yl_table_7',
 'https://www.boxofficemojo.com/release/rl2708702721/?ref_=bo_yl_table_8',
 'https://www.boxofficemojo.com/release/rl3764946433/?ref_=bo_yl_table_9',
 'https://www.boxofficemojo.com/release/rl2371716609/?ref_=bo_yl_table_10',
 'https://www.boxofficemojo.com/release/rl3177416193/?ref_=bo_yl_table_11',
 'https://www.boxofficemojo.com/release/rl1532659201/?ref_=bo_yl_table_12',
 'https://www.boxofficemojo.com/release/rl709199361/?ref_=bo_yl_table_13',
 'https://www.boxoffic

In [261]:
domestic_gross_boxoffice = []
for url in updated_links:
    response=requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    try:
        money = soup.select("span.a-text-bold span.money")[0]
        money_value = money.get_text()
        domestic_gross_boxoffice.append(money_value)
    except IndexError:
        print(f"Could not find 'span.money' in {link}")

domestic_gross_boxoffice

['$596,375,604',
 '$636,238,421',
 '$718,732,821',
 '$804,793,477',
 '$206,305,244',
 '$858,373,000',
 '$700,059,566',
 '$620,181,382',
 '$486,295,561',
 '$652,270,625',
 '$333,176,600',
 '$409,013,994',
 '$623,357,910',
 '$381,011,219',
 '$749,766,139',
 '$402,111,870',
 '$533,345,358',
 '$336,530,303',
 '$423,315,812',
 '$380,270,577',
 '$441,426,807',
 '$339,714,978',
 '$403,706,375',
 '$317,575,550',
 '$260,745,620',
 '$431,088,295',
 '$600,683,057',
 '$250,690,539',
 '$306,169,268',
 '$184,031,112',
 '$312,855,561',
 '$357,067,947',
 '$162,831,698',
 '$204,843,345',
 '$217,631,306',
 '$251,188,924',
 '$156,452,370',
 '$153,665,036',
 '$176,781,728',
 '$211,850,472',
 '$229,242,989',
 '$252,583,617',
 '$359,197,037',
 '$108,185,706',
 '$209,398,025',
 '$134,218,018',
 '$159,978,870',
 '$307,263,857']

### Add that scraped data (domestic gross of each film) to the existing DataFrame:

In [262]:
boxoffice_df_mojo["Total Domestic Gross of TGM"] = domestic_gross_boxoffice
boxoffice_df_mojo

Unnamed: 0,Year,Top Grossing Movie (TGM),Total Domestic Gross of TGM
1,2024,Inside Out 2,"$596,375,604"
2,2023,Barbie,"$636,238,421"
3,2022,Top Gun: Maverick,"$718,732,821"
4,2021,Spider-Man: No Way Home,"$804,793,477"
5,2020,Bad Boys for Life,"$206,305,244"
6,2019,Avengers: Endgame,"$858,373,000"
7,2018,Black Panther,"$700,059,566"
8,2017,Star Wars: Episode VIII - The Last Jedi,"$620,181,382"
9,2016,Finding Dory,"$486,295,561"
10,2015,Jurassic World,"$652,270,625"


### Scrape the names of all the Best Picture Winners at the Oscars (1929 - 2024):

In [263]:
deadline_ai = requests.get("https://deadline.com/gallery/oscars-best-picture-winners/mcdoppe-uv025/")
deadline_ai

<Response [200]>

In [264]:
deadline_ai = deadline_ai.text
deadline_soup = BeautifulSoup(deadline_ai)

In [265]:
deadline_movies = deadline_soup.find_all('article', class_='pmc-fallback-list-item')
deadline_movies

[<article class="pmc-fallback-list-item lrv-u-margin-lr-auto lrv-u-max-width-500">
 <h2>‘Oppenheimer’ — 2024</h2>
 <figure class="lrv-u-margin-t-050">
 <img alt="" class="attachment-medium size-medium" data-lazy-sizes="(min-width: 87.5rem) 1000px, (min-width: 78.75rem) 681px, (min-width: 48rem) 450px, (max-width: 48rem) 250px" data-lazy-src="https://deadline.com/wp-content/uploads/2024/02/Downey-Murphy-Oppenheimer-02.jpg?w=300" data-lazy-srcset="https://deadline.com/wp-content/uploads/2024/02/Downey-Murphy-Oppenheimer-02.jpg 2958w, https://deadline.com/wp-content/uploads/2024/02/Downey-Murphy-Oppenheimer-02.jpg?resize=150,104 150w, https://deadline.com/wp-content/uploads/2024/02/Downey-Murphy-Oppenheimer-02.jpg?resize=300,208 300w, https://deadline.com/wp-content/uploads/2024/02/Downey-Murphy-Oppenheimer-02.jpg?resize=1024,709 1024w, https://deadline.com/wp-content/uploads/2024/02/Downey-Murphy-Oppenheimer-02.jpg?resize=1536,1064 1536w, https://deadline.com/wp-content/uploads/2024/02/D

In [266]:
oscars_list = []
for title in deadline_movies:
    oscars_movies = title.select('h2')
    oscars_list.extend([movie.text for movie in oscars_movies])

oscars_list

['‘Oppenheimer’ — 2024',
 '‘Everything Everywhere All at Once’ — 2023',
 '2022 – CODA',
 '2021 – Nomadland',
 '2020 – Parasite',
 '2019 – Green Book',
 '2018 – The Shape of Water',
 '2017 – Moonlight',
 '2016 – Spotlight',
 '2015 – Birdman or (The Unexpected Virtue of Ignorance)',
 '2014 – 12 Years a Slave',
 '2013 – Argo',
 '2012 – The Artist',
 '2011 – The King’s Speech',
 '2010 – The Hurt Locker',
 '2009 – Slumdog Millionaire',
 '2008 – No Country For Old Men',
 '2007 – The Departed',
 '2006 – Crash',
 '2005 – Million Dollar Baby',
 '2004 – Lord Of The Rings: Return Of The King',
 '2003 – Chicago',
 '2002 – A Beautiful Mind',
 '2001 – Gladiator',
 '2000 – American Beauty',
 '1999 – Shakespeare In Love',
 '1998 – Titanic',
 '1997 – The English Patient',
 '1996 – Braveheart',
 '1995 – Forrest Gump',
 '1994 – Schindler’s List',
 '1993 – Unforgiven',
 '1992 – The Silence Of The Lambs',
 '1991 – Dances With Wolves',
 '1990 – Driving Miss Daisy',
 '1989 – Rain Man',
 '1988 – The Last Empe

### Clean the list that was scraped so formatting is consistent:

In [267]:
def clean(oscar_winners):
    oscar_winners = oscar_winners.replace(' — ', ' ')
    oscar_winners = oscar_winners.replace('–', ' ')
    return oscar_winners

In [268]:
cleaned_oscars_list = [clean(oscar_winners) for oscar_winners in oscars_list]
cleaned_oscars_list

['‘Oppenheimer’ 2024',
 '‘Everything Everywhere All at Once’ 2023',
 '2022   CODA',
 '2021   Nomadland',
 '2020   Parasite',
 '2019   Green Book',
 '2018   The Shape of Water',
 '2017   Moonlight',
 '2016   Spotlight',
 '2015   Birdman or (The Unexpected Virtue of Ignorance)',
 '2014   12 Years a Slave',
 '2013   Argo',
 '2012   The Artist',
 '2011   The King’s Speech',
 '2010   The Hurt Locker',
 '2009   Slumdog Millionaire',
 '2008   No Country For Old Men',
 '2007   The Departed',
 '2006   Crash',
 '2005   Million Dollar Baby',
 '2004   Lord Of The Rings: Return Of The King',
 '2003   Chicago',
 '2002   A Beautiful Mind',
 '2001   Gladiator',
 '2000   American Beauty',
 '1999   Shakespeare In Love',
 '1998   Titanic',
 '1997   The English Patient',
 '1996   Braveheart',
 '1995   Forrest Gump',
 '1994   Schindler’s List',
 '1993   Unforgiven',
 '1992   The Silence Of The Lambs',
 '1991   Dances With Wolves',
 '1990   Driving Miss Daisy',
 '1989   Rain Man',
 '1988   The Last Emperor 

In [269]:
cleaned_regex_oscars_list = []

for item in cleaned_oscars_list:
    match = re.match(r'(\d+)\s+(.*)', item)
    if match:
        year = int(match.group(1))
        movie = match.group(2).strip()
        cleaned_regex_oscars_list.append((year, movie))

cleaned_regex_oscars_list

[(2022, 'CODA'),
 (2021, 'Nomadland'),
 (2020, 'Parasite'),
 (2019, 'Green Book'),
 (2018, 'The Shape of Water'),
 (2017, 'Moonlight'),
 (2016, 'Spotlight'),
 (2015, 'Birdman or (The Unexpected Virtue of Ignorance)'),
 (2014, '12 Years a Slave'),
 (2013, 'Argo'),
 (2012, 'The Artist'),
 (2011, 'The King’s Speech'),
 (2010, 'The Hurt Locker'),
 (2009, 'Slumdog Millionaire'),
 (2008, 'No Country For Old Men'),
 (2007, 'The Departed'),
 (2006, 'Crash'),
 (2005, 'Million Dollar Baby'),
 (2004, 'Lord Of The Rings: Return Of The King'),
 (2003, 'Chicago'),
 (2002, 'A Beautiful Mind'),
 (2001, 'Gladiator'),
 (2000, 'American Beauty'),
 (1999, 'Shakespeare In Love'),
 (1998, 'Titanic'),
 (1997, 'The English Patient'),
 (1996, 'Braveheart'),
 (1995, 'Forrest Gump'),
 (1994, 'Schindler’s List'),
 (1993, 'Unforgiven'),
 (1992, 'The Silence Of The Lambs'),
 (1991, 'Dances With Wolves'),
 (1990, 'Driving Miss Daisy'),
 (1989, 'Rain Man'),
 (1988, 'The Last Emperor 1987'),
 (1987, 'Platoon'),
 (1986

In [270]:
recent_winners = [(2024, 'Oppenheimer'), (2023, 'Everything Everywhere All at Once')]

In [271]:
oscar_winners_list = recent_winners + cleaned_regex_oscars_list 
oscar_winners_list

[(2024, 'Oppenheimer'),
 (2023, 'Everything Everywhere All at Once'),
 (2022, 'CODA'),
 (2021, 'Nomadland'),
 (2020, 'Parasite'),
 (2019, 'Green Book'),
 (2018, 'The Shape of Water'),
 (2017, 'Moonlight'),
 (2016, 'Spotlight'),
 (2015, 'Birdman or (The Unexpected Virtue of Ignorance)'),
 (2014, '12 Years a Slave'),
 (2013, 'Argo'),
 (2012, 'The Artist'),
 (2011, 'The King’s Speech'),
 (2010, 'The Hurt Locker'),
 (2009, 'Slumdog Millionaire'),
 (2008, 'No Country For Old Men'),
 (2007, 'The Departed'),
 (2006, 'Crash'),
 (2005, 'Million Dollar Baby'),
 (2004, 'Lord Of The Rings: Return Of The King'),
 (2003, 'Chicago'),
 (2002, 'A Beautiful Mind'),
 (2001, 'Gladiator'),
 (2000, 'American Beauty'),
 (1999, 'Shakespeare In Love'),
 (1998, 'Titanic'),
 (1997, 'The English Patient'),
 (1996, 'Braveheart'),
 (1995, 'Forrest Gump'),
 (1994, 'Schindler’s List'),
 (1993, 'Unforgiven'),
 (1992, 'The Silence Of The Lambs'),
 (1991, 'Dances With Wolves'),
 (1990, 'Driving Miss Daisy'),
 (1989, 'Ra

## Put the clean list into a DataFrame:

In [272]:
column_names = ['Year', 'Oscar Winner']

In [273]:
oscars_list_df = pd.DataFrame(oscar_winners_list, columns=column_names)
oscars_list_df = oscars_list_df[:48]

In [274]:
domestic_gross_winners = [ 
    {"Oppenheimer": "$329,862,540"},
    {"Everything Everywhere All at Once": "$77,191,785"},
    {"CODA": "$2,601,649"},
    {"Nomadland": "$3,700,000"},
    {"Parasite": "$53,369,749"},
    {"Green Book": "$85,080,171"},
    {"The Shape of Water": "$63,859,435"},
    {"Moonlight": "$27,854,932"},
    {"Spotlight": "$45,055,776"},
    {"Birdman or (The Unexpected Virtue of Ignorance)": "$42,340,598"},
    {"12 Years a Slave": "$56,671,993"},
    {"Argo": "$136,025,503"},
    {"The Artist": "$44,671,682"},
    {"The King's Speech": "$138,797,449"},
    {"The Hurt Locker": "$17,017,811"},
    {"Slumdog Millionaire": "$141,319,928"},
    {"No Country for Old Men": "$74,283,625"},
    {"The Departed": "$132,399,394"},
    {"Crash": "$54,580,300"},
    {"Million Dollar Baby": "$100,492,203"},
    {"The Lord of the Rings: The Return of the King": "$377,027,325"},
    {"Chicago": "$170,687,518"},
    {"A Beautiful Mind": "$170,742,341"},
    {"Gladiator": "$187,705,427"},
    {"American Beauty": "$130,096,601"},
    {"Shakespeare in Love": "$100,317,794"},
    {"Titanic": "$600,683,057"},
    {"The English Patient": "$78,676,425"},
    {"Braveheart": "$75,609,945"},
    {"Forrest Gump": "$330,455,270"},
    {"Schindler's List": "$96,898,818"},
    {"Unforgiven": "$101,167,799"},
    {"The Silence of the Lambs": "$130,742,922"},
    {"Dances with Wolves": "$184,208,848"},
    {"Driving Miss Daisy": "$106,593,296"},
    {"Rain Man": "$172,825,435"},
    {"The Last Emperor 1987": "$43,984,230"},
    {"Platoon": "$138,530,565"},
    {"Out of Africa": "$87,071,205"},
    {"Amadeus": "$51,973,029"},
    {"Terms of Endearment": "$108,423,489"},
    {"Gandhi": "$52,767,889"},
    {"Chariots of Fire": "$58,972,904"},
    {"Ordinary People": "$54,766,923"},
    {"Kramer vs. Kramer": "$106,260,000"},
    {"The Deer Hunter": "$48,979,328"},
    {"Annie Hall": "$38,251,425"},
    {"Rocky": "$117,235,147"}
]

In [275]:
dom_gross = [dom[list(dom.keys())[0]] for dom in domestic_gross_winners]

In [276]:
oscars_list_df["Total Domestic Gross of Oscar Winners"] = dom_gross
oscars_list_df

Unnamed: 0,Year,Oscar Winner,Total Domestic Gross of Oscar Winners
0,2024,Oppenheimer,"$329,862,540"
1,2023,Everything Everywhere All at Once,"$77,191,785"
2,2022,CODA,"$2,601,649"
3,2021,Nomadland,"$3,700,000"
4,2020,Parasite,"$53,369,749"
5,2019,Green Book,"$85,080,171"
6,2018,The Shape of Water,"$63,859,435"
7,2017,Moonlight,"$27,854,932"
8,2016,Spotlight,"$45,055,776"
9,2015,Birdman or (The Unexpected Virtue of Ignorance),"$42,340,598"


## Merge the two datasets together:

In [280]:
boxoffice_oscars_df = pd.merge(boxoffice_df_mojo, oscars_list_df, on='Year', how='inner')
boxoffice_oscars_df = boxoffice_oscars_df[:41]
boxoffice_oscars_df

Unnamed: 0,Year,Top Grossing Movie (TGM),Total Domestic Gross of TGM,Oscar Winner,Total Domestic Gross of Oscar Winners
0,2024,Inside Out 2,"$596,375,604",Oppenheimer,"$329,862,540"
1,2023,Barbie,"$636,238,421",Everything Everywhere All at Once,"$77,191,785"
2,2022,Top Gun: Maverick,"$718,732,821",CODA,"$2,601,649"
3,2021,Spider-Man: No Way Home,"$804,793,477",Nomadland,"$3,700,000"
4,2020,Bad Boys for Life,"$206,305,244",Parasite,"$53,369,749"
5,2019,Avengers: Endgame,"$858,373,000",Green Book,"$85,080,171"
6,2018,Black Panther,"$700,059,566",The Shape of Water,"$63,859,435"
7,2017,Star Wars: Episode VIII - The Last Jedi,"$620,181,382",Moonlight,"$27,854,932"
8,2016,Finding Dory,"$486,295,561",Spotlight,"$45,055,776"
9,2015,Jurassic World,"$652,270,625",Birdman or (The Unexpected Virtue of Ignorance),"$42,340,598"


In [278]:
boxoffice_oscars_df['Did the TGM win Best Picture?'] = boxoffice_oscars_df['Top Grossing Movie (TGM)'].isin(boxoffice_oscars_df['Oscar Winner'])
(boxoffice_oscars_df[(boxoffice_oscars_df['Did the TGM win Best Picture?'] == True)])

Unnamed: 0,Year,Top Grossing Movie (TGM),Total Domestic Gross of TGM,Oscar Winner,Total Domestic Gross of Oscar Winners,Did the TGM win Best Picture?
26,1998,Titanic,"$600,683,057",Titanic,"$600,683,057",True


#### Therefore, Titanic is the only number one top grossing movie that won the Oscar for Best Picture 

In [279]:
boxoffice_oscars_df.to_csv('FLOURISH_Boxoffice_Oscars.csv', index=False)

## To work out averages, converting domestic gross into integers is necessary

In [297]:
boxoffice_oscars_df['Oscar Winners DomGross Int'] = boxoffice_oscars_df['Total Domestic Gross of Oscar Winners'].str.replace('$', '').str.replace(',', '')

In [300]:
boxoffice_oscars_df['Oscar Winners DomGross Int'].astype(int)[:11][1:].mean()

45772608.8

In [302]:
boxoffice_oscars_df['TGM DomGross Int'] = boxoffice_oscars_df['Total Domestic Gross of TGM'].str.replace('$', '').str.replace(',', '')

In [303]:
boxoffice_oscars_df['TGM DomGross Int'].astype(int)[:11][1:].mean()

601642669.7

In [308]:
601642669.7 / 45772608.8

13.144163845430635

**On average, the top grossing movies at the box office made over 13x more revenue than the Oscar winners for Best Picture from 2014-2023.**

Oscar winners were averaging 45,772,608 USD at the box office while, the top-grossing movies were averaging a whopping 601,642,669 USD**