In [20]:
import os
from bs4 import BeautifulSoup
import requests
import pandas as pd

from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import HoverTool,WheelZoomTool ,BoxZoomTool, ResetTool
from bokeh.models import DatetimeTickFormatter

In [21]:
output_notebook()

## Title

Get series in from IMDB.
Get info for entire show.
Store - pandas df -> csv/tab
Plot to bokeh.
Make interactive
Expand to other series and movies. Manually getting film & show IDs easiest.

In [13]:
import sys
sys.version
sys.version_info

sys.version_info(major=3, minor=6, micro=4, releaselevel='final', serial=0)

In [6]:
def tv_show_data_gatherer(imdb_id, number_of_series):
    """Given in the IMDB unique url ID and number of series of a show, returns a pandas DF with show info.
    Returns: airdate, rating, number of votes, series, ep, title of episode, description.
    """
    
    all_series_data = pd.DataFrame()
    
    for series in range(0, number_of_series):
        
        series += 1
        print(f"Getting season {series}.")
       
        # https://www.imdb.com/title/tt0112178/episodes?season=1&ref_=tt_eps_sn_1

        req = requests.get(f"https://www.imdb.com/title/{imdb_id}/episodes?season={series}&ref_=tt_eps_sn_1")
        
        req.status_code
        req.encoding
        req.text[0:500]
        soup = BeautifulSoup(req.text, 'html.parser')

        imdb_episodes = [soup.find_all("div", class_="list_item odd"), soup.find_all("div", class_="list_item even")]
        
        episodes = pd.DataFrame()
        
        odd_even = 0

        for x in imdb_episodes:         

            airdate = pd.Series()
            rating = pd.Series()
            votes = pd.Series()
            title = pd.Series()
            description = pd.Series()
            season = pd.Series()
            ep = pd.Series()
            
            episode = -1 + odd_even           

            for i in x:
                
                episode += 2 
                               
                #print(i.find("div", class_="airdate").text)
                airdate = airdate.append([pd.Series((i.find("div", class_="airdate").text)[0:-5].strip("\n"))])
                #print(i.find("span", class_="ipl-rating-star__rating").text)
                rating = rating.append([pd.Series(i.find("span", class_="ipl-rating-star__rating").text)])
                #print(i.find("span", class_="ipl-rating-star__total-votes").text)
                votes = votes.append([pd.Series((i.find("span", class_="ipl-rating-star__total-votes").text)[1:-1])])
                #print(i.find("strong",).text)
                season = season.append([pd.Series(series)])
                ep = ep.append([pd.Series(episode)])                
                title = title.append([pd.Series((i.find("strong",).text).strip())])
                #print(i.find("div", class_="item_description").text)
                description = description.append([pd.Series(((i.find("div", class_="item_description").text).strip("\n").strip()))])
                

            episodes = pd.concat([airdate, rating.astype("float"), votes, season, ep,
                                  title, description], axis=1) #astype("int64")
            episodes[0] = pd.to_datetime(episodes[0])
            episodes[2] = episodes[2].str.replace(',', '').astype(int)
            episodes.sort_values([0])# .dtypes
            episodes.reset_index()
            
            odd_even += 1
            


                #episodes.iloc[0,5]

            all_series_data = all_series_data.append(episodes)
            
            
        all_series_data = all_series_data.sort_values(by=[0])
        
    all_series_data.columns = ["Aired", "Rating", "Votes", "Season", "Episode", "Title", "Description"]
    all_series_data = all_series_data.reset_index()
    all_series_data = all_series_data.drop(["index"], axis=1)
    all_series_data = all_series_data.set_index("Aired")

        
        #all_series_data.rename(columns={0: "Aired", 1: 'Rating', 2: 'Votes', 3: 'Season', 4: 'Episode', 
         #                               5: 'Title', 6: 'Description'}, inplace=True)
        
    #print(all_series_data.head())
        
        # rename cols
        # ====================================
        
        # print(all_series_data[0])
        
    return(all_series_data)


In [13]:
# Dict of TV shows to gather data for. 
shows = {
    "StarTrek_Voyager": ["tt0112178", 7], #7
    "StarTrek_DS9": ["tt0106145", 7], #7
    "StarTrek_TNG": ["tt0092455", 7], #7
    "StarTrek_Enterprise": ["tt0244365", 4], #4
    
    
}

for k, v in shows.items():    
    try:
        os.mkdir("season_output")
        print(k, v[0], v[1])

        all_data = tv_show_data_gatherer(v[0], v[1])

        all_data.to_csv(f"{k}", sep=',')
        os.chdir("..")
    except:
        os.chdir("season_output")
        print(k, v[0], v[1])

        all_data = tv_show_data_gatherer(v[0], v[1])

        all_data.to_csv(f"{k}", sep=',')
        os.chdir("..")
        # endgame has wrong date, edit. 

StarTrek_Voyager tt0112178 7
Getting season 1.
Getting season 2.
Getting season 3.
Getting season 4.
Getting season 5.
Getting season 6.
Getting season 7.
            Rating  Votes  Season  Episode           Title  \
Aired                                                        
1996-08-26     7.4   2759       1        1       Caretaker   
1996-09-29     7.2   1176       1        2        Parallax   
1996-10-06     7.2   1154       1        3  Time and Again   
1996-10-13     7.1   1096       1        4           Phage   
1996-10-20     6.5   1062       1        5       The Cloud   

                                                  Description  
Aired                                                          
1996-08-26  While pursuing the trail of Maquis rebels, a n...  
1996-09-29  Tensions rise between the merged starfleet and...  
1996-10-06  The Voyager crew discovers a planet which rece...  
1996-10-13  Searching to replenish their dilithium supplie...  
1996-10-20  Voyager become

In [23]:
 os.chdir("..") # for testing to move back up dir. 

In [28]:
# Make graphs

os.chdir("season_output")

files = os.listdir(".")

colours = ["red", "blue", "green", "purple"]


hover = HoverTool(
        tooltips=[
           # ("Aired", "@Aired"),
            ("Rating", "@Rating"), # change rounding
            ("Votes", "@Votes"),
            ("Season", "@Season"),
            ("Episode", "@Episode"),
            ("Title", "@Title"),
            ("Description", "@Description"),

        ]
    )

p = figure(plot_width=1000, plot_height=700, title="Star Trek",  x_axis_type="datetime", tools=[hover])
p.xaxis.formatter=DatetimeTickFormatter(years=["%d %B %Y"])

col_num = 0

for i in files:
    
    

    df_i = pd.read_csv(f"{i}", sep=",", index_col="Aired")
    print(df_i.head())

    df_i.index = pd.to_datetime(df_i.index)

    seasonss = df_i["Season"].unique() 
    

    for x in seasonss:
        
        # print(df_i.loc[df_i['Season'] == i])

        source = ColumnDataSource(df_i.loc[df_i['Season'] == x]) # df.loc[df['column_name'] == some_value]

        p.circle('Aired', 'Rating', size=10, source=source, fill_color=colours[col_num], line_color=colours[col_num])
        #p.line('Aired', 'Rating', source=source)
    col_num += 1
    
p.add_tools(WheelZoomTool(), BoxZoomTool(), ResetTool())

    
show(p)

os.chdir("..")
 

            Rating  Votes  Season  Episode           Title  \
Aired                                                        
1996-08-26     7.4   2759       1        1       Caretaker   
1996-09-29     7.2   1176       1        2        Parallax   
1996-10-06     7.2   1154       1        3  Time and Again   
1996-10-13     7.1   1096       1        4           Phage   
1996-10-20     6.5   1062       1        5       The Cloud   

                                                  Description  
Aired                                                          
1996-08-26  While pursuing the trail of Maquis rebels, a n...  
1996-09-29  Tensions rise between the merged starfleet and...  
1996-10-06  The Voyager crew discovers a planet which rece...  
1996-10-13  Searching to replenish their dilithium supplie...  
1996-10-20  Voyager becomes trapped in a strange nebula wh...  
            Rating  Votes  Season  Episode                Title  \
Aired                                             