In [1]:
# Dependencies
import pandas as pd
from sqlalchemy import create_engine
import requests
import json
import time
from pprint import pprint
from config import api_key

## Scrape Hulu Orginial TV shows from Wikipedia

In [2]:
hulu_url = 'https://en.wikipedia.org/wiki/List_of_Hulu_original_programming'

In [3]:
# Use Panda's `read_html` to parse the url
hulu_tables = pd.read_html(hulu_url)
hulu_tables

[                        Title                                 Genre  \
 0              The Confession                                 Drama   
 1               East Los High                            Teen drama   
 2                    The Path                                 Drama   
 3                    Freakish                                Horror   
 4                      Chance  Crime drama / Psychological thriller   
 5                    Shut Eye                                 Drama   
 6                    11.22.63                                 Drama   
 7               Dimension 404           Science fiction / anthology   
 8         The Handmaid's Tale                       Dystopian drama   
 9           Marvel's Runaways                Superhero / Teen drama   
 10          The Looming Tower                                 Drama   
 11                Castle Rock        Psychological horror anthology   
 12              Into the Dark                      Horror antho

## Extract Tables into DataFrames and Drop unreleased shows

In [4]:
df1 = hulu_tables[0]
df1.columns = ['title', 'genre', 'premiere', 'seasons', 'length', 'status']
df1.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
19,Little Fires Everywhere,Drama,"March 18, 2020",8 episodes,53–60 min.,Miniseries
20,Mrs. America,Drama,"April 15, 2020[a]",9 episodes,43–48 min.,Miniseries
21,Awaiting release,Awaiting release,Awaiting release,Awaiting release,Awaiting release,Awaiting release
22,Monsterland[12],Horror anthology,"October 2, 2020[13]","1 season, 8 episodes",TBA,Pending
23,Marvel's Helstrom[14],Superhero / Horror,"October 16, 2020[15]","1 season, 10 episodes",TBA,Pending


In [5]:
df1 = df1[:-3]
df1.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
16,Looking for Alaska,Teen drama,"October 18, 2019",8 episodes,48–57 min.,Miniseries
17,Reprisal,Drama,"December 6, 2019","1 seasons, 10 episodes",41–57 min.,Ended[11]
18,Devs,Drama,"March 5, 2020[a]",8 episodes,43–57 min.,Miniseries
19,Little Fires Everywhere,Drama,"March 18, 2020",8 episodes,53–60 min.,Miniseries
20,Mrs. America,Drama,"April 15, 2020[a]",9 episodes,43–48 min.,Miniseries


In [6]:
df2 = hulu_tables[1]
df2.columns = ['title', 'genre', 'premiere', 'seasons', 'length', 'status']
df2.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
14,High Fidelity,Comedy-drama,"February 14, 2020","1 season, 10 episodes",26–34 min.,Ended[25]
15,The Great,Historical comedy,"May 15, 2020","1 season, 10 episodes",50–55 min.,Renewed[26]
16,"Love, Victor",Romantic comedy,"June 17, 2020","1 season, 10 episodes",25–31 min.,Renewed[13]
17,Awaiting release,Awaiting release,Awaiting release,Awaiting release,Awaiting release,Awaiting release
18,Woke[27],Comedy,"September 9, 2020[28]","1 season, 8 episodes",TBA,Pending


In [7]:
df2 = df2[:-2]
df2.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
12,Four Weddings and a Funeral,Romantic comedy anthology series,"July 31, 2019",10 episodes,44–50 min.,Miniseries
13,Dollface,Comedy,"November 15, 2019","1 season, 10 episodes",22–32 min.,Renewed[10]
14,High Fidelity,Comedy-drama,"February 14, 2020","1 season, 10 episodes",26–34 min.,Ended[25]
15,The Great,Historical comedy,"May 15, 2020","1 season, 10 episodes",50–55 min.,Renewed[26]
16,"Love, Victor",Romantic comedy,"June 17, 2020","1 season, 10 episodes",25–31 min.,Renewed[13]


In [8]:
df3 = hulu_tables[2]
df3.columns = ['title', 'genre', 'premiere', 'seasons', 'length', 'status']
df3.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
0,The Awesomes,Superhero comedy,"August 1, 2013","3 seasons, 30 episodes",22 min.,Ended[29]
1,Solar Opposites,Science fiction / Sitcom,"May 8, 2020","1 season, 8 episodes",22 min.,Renewed for seasons 2 and 3[30]
2,Crossing Swords,Medieval fantasy comedy,"June 12, 2020","1 season, 10 episodes",29 min.,Renewed[30]


In [9]:
df4 = hulu_tables[3]
df4.columns = ['title', 'genre', 'premiere', 'seasons', 'length', 'status']
df4.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
0,The Doozers,Adventure,"April 25, 2014","2 seasons, 62 episodes",12–22 min.,Ended
1,The Bravest Knight,Fantasy,"June 21, 2019","1 season, 13 episodes",12 min.,Pending
2,Upcoming,Upcoming,Upcoming,Upcoming,Upcoming,Upcoming
3,Animaniacs[31],Comedy,"November 20, 2020[13]","1 season, 13 episodes",30 min.,Renewed


In [10]:
df4 = df4[:-2]
df4.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
0,The Doozers,Adventure,"April 25, 2014","2 seasons, 62 episodes",12–22 min.,Ended
1,The Bravest Knight,Fantasy,"June 21, 2019","1 season, 13 episodes",12 min.,Pending


In [11]:
df5 = hulu_tables[4]
df5.columns = ['title', 'genre', 'premiere', 'seasons', 'length', 'status']
df5.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
3,RocketJump: The Show,Making-of,"December 2, 2015","1 season, 8 episodes",21–27 min.,Ended
4,Triumph's Election Watch 2016,Political comedy,"February 8, 2016","1 season, 6 episodes",30–85 min.,Ended
5,Vice Investigates,Docuseries,"November 1, 2019","1 season, 10 episodes",59 min.,Season 1 ongoing
6,Defining Moments with OZY,Docuseries,"March 4, 2020",TBA,TBA,Season 1 ongoing
7,Hillary,Docuseries,"March 6, 2020",4 episodes,59 min.,Miniseries


In [12]:
df6 = hulu_tables[5]
df6.columns = ['title', 'genre', 'premiere', 'seasons', 'length', 'status']
df6.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
2,Spoilers with Kevin Smith,Film criticism,"June 4, 2012","1 season, 10 episodes",20–26 min.,Ended[35]
3,"I Love You, America with Sarah Silverman",Politics,"October 12, 2017","2 seasons, 21 episodes",26–33 min.,Ended[36]
4,Taste the Nation with Padma Lakshmi,Cooking show,"June 18, 2020","1 season, 10 episodes",29 min.,Renewed[13]
5,Awaiting release,Awaiting release,Awaiting release,Awaiting release,Awaiting release,Awaiting release
6,Eater's Guide To The World[13],Reality,"November 11, 2020","1 season, 7 episodes",TBA,Pending


In [13]:
df6 = df6[:-2]
df6.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
0,If I Can Dream,Reality,"March 2, 2010","1 season, 32 episodes",25 min.,Ended
1,The Morning After,Pop culture,"January 17, 2011","3 seasons, 800 episodes",5 min.,Ended[34]
2,Spoilers with Kevin Smith,Film criticism,"June 4, 2012","1 season, 10 episodes",20–26 min.,Ended[35]
3,"I Love You, America with Sarah Silverman",Politics,"October 12, 2017","2 seasons, 21 episodes",26–33 min.,Ended[36]
4,Taste the Nation with Padma Lakshmi,Cooking show,"June 18, 2020","1 season, 10 episodes",29 min.,Renewed[13]


## Merge All the tables into One

In [14]:
hulu_df = df1.append(df2)
hulu_df.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
12,Four Weddings and a Funeral,Romantic comedy anthology series,"July 31, 2019",10 episodes,44–50 min.,Miniseries
13,Dollface,Comedy,"November 15, 2019","1 season, 10 episodes",22–32 min.,Renewed[10]
14,High Fidelity,Comedy-drama,"February 14, 2020","1 season, 10 episodes",26–34 min.,Ended[25]
15,The Great,Historical comedy,"May 15, 2020","1 season, 10 episodes",50–55 min.,Renewed[26]
16,"Love, Victor",Romantic comedy,"June 17, 2020","1 season, 10 episodes",25–31 min.,Renewed[13]


In [15]:
hulu_df = hulu_df.append(df3)
hulu_df.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
15,The Great,Historical comedy,"May 15, 2020","1 season, 10 episodes",50–55 min.,Renewed[26]
16,"Love, Victor",Romantic comedy,"June 17, 2020","1 season, 10 episodes",25–31 min.,Renewed[13]
0,The Awesomes,Superhero comedy,"August 1, 2013","3 seasons, 30 episodes",22 min.,Ended[29]
1,Solar Opposites,Science fiction / Sitcom,"May 8, 2020","1 season, 8 episodes",22 min.,Renewed for seasons 2 and 3[30]
2,Crossing Swords,Medieval fantasy comedy,"June 12, 2020","1 season, 10 episodes",29 min.,Renewed[30]


In [16]:
hulu_df = hulu_df.append(df4)
hulu_df.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
0,The Awesomes,Superhero comedy,"August 1, 2013","3 seasons, 30 episodes",22 min.,Ended[29]
1,Solar Opposites,Science fiction / Sitcom,"May 8, 2020","1 season, 8 episodes",22 min.,Renewed for seasons 2 and 3[30]
2,Crossing Swords,Medieval fantasy comedy,"June 12, 2020","1 season, 10 episodes",29 min.,Renewed[30]
0,The Doozers,Adventure,"April 25, 2014","2 seasons, 62 episodes",12–22 min.,Ended
1,The Bravest Knight,Fantasy,"June 21, 2019","1 season, 13 episodes",12 min.,Pending


In [17]:
hulu_df = hulu_df.append(df5)
hulu_df.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
3,RocketJump: The Show,Making-of,"December 2, 2015","1 season, 8 episodes",21–27 min.,Ended
4,Triumph's Election Watch 2016,Political comedy,"February 8, 2016","1 season, 6 episodes",30–85 min.,Ended
5,Vice Investigates,Docuseries,"November 1, 2019","1 season, 10 episodes",59 min.,Season 1 ongoing
6,Defining Moments with OZY,Docuseries,"March 4, 2020",TBA,TBA,Season 1 ongoing
7,Hillary,Docuseries,"March 6, 2020",4 episodes,59 min.,Miniseries


In [18]:
hulu_df = hulu_df.append(df6)
hulu_df.tail()

Unnamed: 0,title,genre,premiere,seasons,length,status
0,If I Can Dream,Reality,"March 2, 2010","1 season, 32 episodes",25 min.,Ended
1,The Morning After,Pop culture,"January 17, 2011","3 seasons, 800 episodes",5 min.,Ended[34]
2,Spoilers with Kevin Smith,Film criticism,"June 4, 2012","1 season, 10 episodes",20–26 min.,Ended[35]
3,"I Love You, America with Sarah Silverman",Politics,"October 12, 2017","2 seasons, 21 episodes",26–33 min.,Ended[36]
4,Taste the Nation with Padma Lakshmi,Cooking show,"June 18, 2020","1 season, 10 episodes",29 min.,Renewed[13]


## Transform Hulu DataFrame

In [19]:
hulu_df = hulu_df.reset_index().drop(columns=['index','seasons', 'premiere','length','status'])

In [20]:
hulu_df

Unnamed: 0,title,genre
0,The Confession,Drama
1,East Los High,Teen drama
2,The Path,Drama
3,Freakish,Horror
4,Chance,Crime drama / Psychological thriller
5,Shut Eye,Drama
6,11.22.63,Drama
7,Dimension 404,Science fiction / anthology
8,The Handmaid's Tale,Dystopian drama
9,Marvel's Runaways,Superhero / Teen drama


In [21]:
# Convert the titles into a list
titles = hulu_df['title'].to_list()

print(len(titles))
print(titles)

56
['The Confession', 'East Los High', 'The Path', 'Freakish', 'Chance', 'Shut Eye', '11.22.63', 'Dimension 404', "The Handmaid's Tale", "Marvel's Runaways", 'The Looming Tower', 'Castle Rock', 'Into the Dark', 'Light as a Feather', 'The Act', 'Wu-Tang: An American Saga', 'Looking for Alaska', 'Reprisal', 'Devs', 'Little Fires Everywhere', 'Mrs. America', 'Battleground', 'Quick Draw', 'Deadbeat', 'The Hotwives', 'The Neighbors', 'Difficult People', 'Casual', 'Future Man', 'All Night', 'PEN15', 'Shrill', 'Ramy', 'Four Weddings and a Funeral', 'Dollface', 'High Fidelity', 'The Great', 'Love, Victor', 'The Awesomes', 'Solar Opposites', 'Crossing Swords', 'The Doozers', 'The Bravest Knight', 'A Day in the Life', 'Up to Speed', 'Behind the Mask', 'RocketJump: The Show', "Triumph's Election Watch 2016", 'Vice Investigates', 'Defining Moments with OZY', 'Hillary', 'If I Can Dream', 'The Morning After', 'Spoilers with Kevin Smith', 'I Love You, America with Sarah Silverman', 'Taste the Nation 

## Extract Hulu series data from OMDB API

In [22]:
# Empty list to recieve API call data 
ratings = []
writers = []
years = []

url = f'http://www.omdbapi.com/?apikey={api_key}&type=series&t='

In [23]:
# Loop to get Hulu TV shows' data
for title in titles:
    
    try:
        series_data = requests.get(url + title).json()
        if (series_data['imdbRating'] != 'N/A'):
            ratings.append(series_data['imdbRating'])
            writers.append(series_data['Writer'])
            years.append(series_data['Year'])
            time.sleep(1)
        else:
            print(f'could not find {title} rating')
    except:
        print(f'could not find {title} rating')

could not find Marvel's Runaways rating
could not find The Doozers rating
could not find I Love You, America with Sarah Silverman rating
could not find Taste the Nation with Padma Lakshmi rating


In [24]:
print(len(ratings))
print(len(writers))
print(len(years))

52
52
52


## Transform hulu_originals DataFrame Again

In [25]:
# Drop TV shows that had no data
hulu_df.drop(hulu_df.loc[(hulu_df.title == "Marvel's Runaways") |
                         (hulu_df.title == "The Doozers") |
                         (hulu_df.title == "I Love You, America with Sarah Silverman")|
                         (hulu_df.title == "Taste the Nation with Padma Lakshmi")].index, inplace=True)


In [26]:
hulu_df.reset_index(inplace=True)

In [27]:
hulu_df.drop(columns='index', inplace=True)

In [28]:
hulu_df

Unnamed: 0,title,genre
0,The Confession,Drama
1,East Los High,Teen drama
2,The Path,Drama
3,Freakish,Horror
4,Chance,Crime drama / Psychological thriller
5,Shut Eye,Drama
6,11.22.63,Drama
7,Dimension 404,Science fiction / anthology
8,The Handmaid's Tale,Dystopian drama
9,The Looming Tower,Drama


In [29]:
titles = hulu_df['title'].to_list()
genres = hulu_df['genre'].to_list()

In [30]:
# Create a new cleaned dataframe with all needed data
clean_hulu_df = pd.DataFrame({'title':titles,
                              'genre':genres,
                              'year':years,
                              'imdb_rating':ratings,
                              'writer':writers,
                              'network':'Hulu'})

In [31]:
clean_hulu_df

Unnamed: 0,title,genre,year,imdb_rating,writer,network
0,The Confession,Drama,2011,7.7,,Hulu
1,East Los High,Teen drama,2013–,7.0,,Hulu
2,The Path,Drama,2016–2018,7.3,Jessica Goldberg,Hulu
3,Freakish,Horror,2016–2018,6.3,,Hulu
4,Chance,Crime drama / Psychological thriller,2016–2017,7.7,"Alexandra Cunningham, Kem Nunn",Hulu
5,Shut Eye,Drama,2016–2017,7.4,Leslie Bohem,Hulu
6,11.22.63,Drama,2016,8.2,,Hulu
7,Dimension 404,Science fiction / anthology,2017–,6.6,"Will Campos, Desmond Dolly, Daniel Johnson, Da...",Hulu
8,The Handmaid's Tale,Dystopian drama,2017–,8.5,Bruce Miller,Hulu
9,The Looming Tower,Drama,2018,8.0,,Hulu


## Load DataFrame into database

In [32]:
#create engine to connect to postgres
engine = create_engine(f'postgresql://postgres:postgres@localhost:5432/shows_db')

#export dataframe to postgres
clean_hulu_df.to_sql ('hulu', con = engine, if_exists='append', index=True)