# Setup

In [9]:
import os
import dotenv
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [103]:
import pandas as pd
from itertools import chain

In [11]:
import tmdbsimple as tmdb
tmdb.API_KEY = os.environ.get('TMDB_API_KEY')

In [14]:
search = tmdb.Search()
response = search.tv(query='Star Trek')

In [22]:
for s in search.results:
    print(s['name'], s['id'])

Star Trek 253
Star Trek: Voyager 1855
Star Trek: Lower Decks 85948
Star Trek: Strange New Worlds 103516
Star Trek: Enterprise 314
Star Trek: Discovery 67198
Star Trek: Picard 85949
Star Trek: The Next Generation 655
Star Trek: Deep Space Nine 580
Star Trek: Prodigy 106393
Star Trek: Short Treks 82491
Star Trek: The Animated Series 1992
Star Trek: Phase II 4653
Star Trek Continues 116656
Star Trek: Of Gods and Men 43119
The Center Seat: 55 Years of Star Trek 137895
Star Trek: Hidden Frontier 1081
Star Trek: Odyssey 4080
Pacific 201: A Star Trek Fan Film 208977
Star Trek: The Helena Chronicles 15950


In [28]:
tmdb_series_dict_list = []

for s in search.results:
    tmdb_series_dict_list.append(
        {
            'name': s['name'],
            'id': s['id']
        }
    )
tmdb_series_df = pd.DataFrame(tmdb_series_dict_list)

In [29]:
tmdb_series_df

Unnamed: 0,name,id
0,Star Trek,253
1,Star Trek: Voyager,1855
2,Star Trek: Lower Decks,85948
3,Star Trek: Strange New Worlds,103516
4,Star Trek: Enterprise,314
5,Star Trek: Discovery,67198
6,Star Trek: Picard,85949
7,Star Trek: The Next Generation,655
8,Star Trek: Deep Space Nine,580
9,Star Trek: Prodigy,106393


In [23]:
series_names = ["The Original Series", "The Animated Series", "The Next Generation", "Deep Space Nine",
        "Voyager", "Enterprise", "Discovery", "Picard ", "Lower Decks"]

In [30]:
series_list = []

for series in series_names:
    series_list.append('Star Trek: ' + series)

# The Original Series is just called "Star Trek"
series_list[0] = 'Star Trek'

In [33]:
series_list

['Star Trek',
 'Star Trek: The Animated Series',
 'Star Trek: The Next Generation',
 'Star Trek: Deep Space Nine',
 'Star Trek: Voyager',
 'Star Trek: Enterprise',
 'Star Trek: Discovery',
 'Star Trek: Picard ',
 'Star Trek: Lower Decks']

In [35]:
# Filter TMDB DataFrame with series list
filtered_tmdb_df = tmdb_series_df[tmdb_series_df['name'].isin(series_list)]

In [36]:
filtered_tmdb_df

Unnamed: 0,name,id
0,Star Trek,253
1,Star Trek: Voyager,1855
2,Star Trek: Lower Decks,85948
4,Star Trek: Enterprise,314
5,Star Trek: Discovery,67198
7,Star Trek: The Next Generation,655
8,Star Trek: Deep Space Nine,580
11,Star Trek: The Animated Series,1992


In [97]:
# for each series id get number of seasons & episode count per season
def get_season_infos(id):
    identity = tmdb.TV(id).info()
    #print(identity['name'])
    #print(len(identity['seasons']))
    seasons_list = []
    for season in identity['seasons']:
        #print(id, season['season_number'], season['episode_count'])
        seasons_list.append({
            'id': id,
            'season_number': season['season_number'],
            'episode_count': season['episode_count']
        })
    return seasons_list

In [106]:
tmdb_season_infos_nested_dict_list = []

for index, row in filtered_tmdb_df.iterrows():
    #print(row['name'])
    #print(row['id'])
    season_infos = get_season_infos(row['id'])
    tmdb_season_infos_nested_dict_list.append(season_infos)

# Unnest nested list of dictionaries
tmdb_season_infos = list(chain.from_iterable(tmdb_season_infos_nested_dict_list))
# create dataframe from list of dictionaries
tmdb_seasons_df = pd.DataFrame(tmdb_season_infos)
    

In [119]:
# Season 0 indicates series specials
tmdb_regular_seasons_df = tmdb_seasons_df[tmdb_seasons_df['season_number'] != 0]

In [129]:
tmdb_episodes_infos_nested_dict_list = []

for index, row in tmdb_regular_seasons_df.iterrows():
    for num in range(1, row['episode_count'] + 1):
        episode = tmdb.TV_Episodes(row['id'], row['season_number'], num).info()
    
        tmdb_episodes_infos_nested_dict_list.append({
            'id': row['id'],
            'episode_id': episode['id'],
            'season_number': episode['season_number'],
            'episode_number': episode['episode_number'],
            'air_date': episode['air_date'],
            'name': episode['name'],
            'overview': episode['overview'],
            'vote_average': episode['vote_average'],
            'vote_count': episode['vote_count']
        })

# create dataframe from list of dictionaries
tmdb_episodes_df = pd.DataFrame(tmdb_episodes_infos_nested_dict_list)

In [132]:
# join series names to dataframe
tmdb_df = pd.merge(filtered_tmdb_df, tmdb_episodes_df, on='id')

In [135]:
os.getcwd()

'/Users/reginagalambos/Google Drive_mediasittich/learning_data_science/projects/text_mining_star_trek/notebooks'

In [136]:
# Export to st_metadata.csv
tmdb_df.to_csv(r'../data/raw/st_tmdb.csv', index = False)

In [133]:
tmdb_df

Unnamed: 0,name_x,id,episode_id,season_number,episode_number,air_date,name_y,overview,vote_average,vote_count
0,Star Trek,253,13955,1,1,1966-09-08,The Man Trap,Kirk and his crew are at deadly risk from an a...,6.940,50
1,Star Trek,253,13957,1,2,1966-09-15,Charlie X,Captain Kirk must learn the limits to the powe...,6.829,41
2,Star Trek,253,13956,1,3,1966-09-22,Where No Man Has Gone Before,"While exploring the edge of the galaxy, the En...",6.974,39
3,Star Trek,253,13954,1,4,1966-09-29,The Naked Time,A strange alien substance causes the crew to a...,7.179,39
4,Star Trek,253,13962,1,5,1966-10-06,The Enemy Within,A transporter malfunction causes Captain Kirk ...,7.400,36
...,...,...,...,...,...,...,...,...,...,...
799,Star Trek: The Animated Series,1992,136184,2,2,1974-09-14,Bem,The Enterprise has as an observer on their cur...,5.000,2
800,Star Trek: The Animated Series,1992,136186,2,3,1974-09-21,The Practical Joker,The Enterprise explores a mysterious cloud in ...,6.000,1
801,Star Trek: The Animated Series,1992,136183,2,4,1974-09-28,Albatross,Dr. McCoy is arrested and held responsible for...,6.000,1
802,Star Trek: The Animated Series,1992,136188,2,5,1974-10-05,How Sharper Than a Serpent's Tooth,The Enterprise is tracing the course of a prob...,6.000,1
