In [43]:
import pandas as pd
import csv
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

In [2]:
# Reading TV Time CSV Data
tvshows_df = pd.read_csv("Data/tvtimeshows.csv")
tvshows_df.head()

Unnamed: 0,id,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons
0,288128,X-Ray & Vav,230,10,9,2,Rooster Teeth,4.2,https://dg31sz3gwrwan.cloudfront.net/poster/28...,"[{'number': 1, 'nb_episodes': 4}, {'number': 2..."
1,313803,Sister's Slam Dunk,1298,100,80,2,KBS TV2,4.25,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 33}, {'number': ..."
2,325462,Reverse,270,15,60,1,Tokyo Broadcasting System,4.63,https://dg31sz3gwrwan.cloudfront.net/poster/32...,"[{'number': 1, 'nb_episodes': 10}]"
3,325198,Frame Arms Girl,900,24,25,1,Tokyo MX,3.54,https://dg31sz3gwrwan.cloudfront.net/poster/32...,"[{'number': 1, 'nb_episodes': 12}]"
4,349743,Mr Inbetween,7123,118,25,2,FX,4.42,https://dg31sz3gwrwan.cloudfront.net/poster/34...,"[{'number': 1, 'nb_episodes': 6}, {'number': 2..."


In [3]:
tvshows_df["network"].value_counts().head(20)

YouTube                      715
Netflix                      656
Tokyo MX                     540
ABC (US)                     495
TV Tokyo                     451
NBC                          429
BBC One                      405
CBS                          393
Fuji TV                      336
FOX                          288
Rede Globo                   284
AT-X                         242
MBC                          236
Tokyo Broadcasting System    234
Channel 4                    228
BBC Two                      220
TVN                          192
SBS (KR)                     185
HBO                          179
KBS TV2                      177
Name: network, dtype: int64

In [4]:
# Reading All Episodes CSV Data
episode_df = pd.read_csv("Data/all_episodes.csv")
episode_df.head()

Unnamed: 0,show_id,episode_id,time,episode,title,description,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad
0,315103,5697528,2016-10-05T03:00,S01E01,Idiots with Numbers!,Idiots with Numbers! (S01E01) is the first epi...,9.48,12307.0,175.0,388.0,136.0,1.0,16.0,3.0
1,315103,5774664,2016-10-12T03:00,S01E02,The Inmates Are Stupid! The Guards Are Kind of...,The Inmates Are Stupid! The Guards Are Kind of...,9.52,11423.0,156.0,349.0,82.0,0.0,5.0,3.0
2,315103,5774665,2016-10-19T03:00,S01E03,Another Idiot Has Come!!,Another Idiot Has Come!! (S01E03) is the third...,9.36,11067.0,145.0,339.0,63.0,0.0,7.0,1.0
3,315103,5774666,2016-10-26T03:00,S01E04,Happy New Year! The New Year's Tournament Is W...,Happy New Year! The New Year's Tournament Is W...,8.36,10552.0,134.0,270.0,109.0,0.0,3.0,3.0
4,315103,5774667,2016-11-02T03:00,S01E05,A Fraud and a Hero,A Fraud and a Hero (S01E05) is the fifth episo...,8.94,10340.0,106.0,183.0,223.0,1.0,2.0,2.0


In [5]:
pd.set_option('display.max_columns', None)

In [6]:
# Merging both dataframe on Show Id
merged_df = pd.merge(episode_df, tvshows_df, how="inner", left_on="show_id", right_on="id")
merged_df.head()

Unnamed: 0,show_id,episode_id,time,episode,title,description,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,id,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons
0,315103,5697528,2016-10-05T03:00,S01E01,Idiots with Numbers!,Idiots with Numbers! (S01E01) is the first epi...,9.48,12307.0,175.0,388.0,136.0,1.0,16.0,3.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."
1,315103,5774664,2016-10-12T03:00,S01E02,The Inmates Are Stupid! The Guards Are Kind of...,The Inmates Are Stupid! The Guards Are Kind of...,9.52,11423.0,156.0,349.0,82.0,0.0,5.0,3.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."
2,315103,5774665,2016-10-19T03:00,S01E03,Another Idiot Has Come!!,Another Idiot Has Come!! (S01E03) is the third...,9.36,11067.0,145.0,339.0,63.0,0.0,7.0,1.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."
3,315103,5774666,2016-10-26T03:00,S01E04,Happy New Year! The New Year's Tournament Is W...,Happy New Year! The New Year's Tournament Is W...,8.36,10552.0,134.0,270.0,109.0,0.0,3.0,3.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."
4,315103,5774667,2016-11-02T03:00,S01E05,A Fraud and a Hero,A Fraud and a Hero (S01E05) is the fifth episo...,8.94,10340.0,106.0,183.0,223.0,1.0,2.0,2.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."


In [7]:
# Reading only American Network Names
us_network = pd.read_csv("Data/network_names.csv")
us_network

Unnamed: 0,USA Networks,Count of title
0,A&E,1017
1,ABC,461
2,ABC (US),7843
3,ABC Family,512
4,Adult Swim,634
5,Amazon,998
6,AMC,310
7,Animal Planet,488
8,Apple TV+,82
9,BBC,99


In [72]:
# Inner Merge on Merged Dataframe and USA Network Data
us_data = pd.merge(merged_df, us_network, how="inner", left_on="network", right_on="USA Networks")
us_data

Unnamed: 0,show_id,episode_id,time,episode,title,description,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,id,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons,USA Networks,Count of title
0,79029,375789,2005-11-07T21:30,S01E01,Much Ado About Nothing,Much Ado About Nothing (S01E01) is the first e...,,74.0,5.0,3.0,5.0,0.0,0.0,0.0,79029,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",BBC One,2754
1,79029,375790,2005-11-14T21:30,S01E02,Macbeth,Macbeth (S01E02) is the second episode of seas...,,94.0,14.0,0.0,5.0,0.0,2.0,0.0,79029,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",BBC One,2754
2,79029,375791,2005-11-21T21:30,S01E03,The Taming of the Shrew,The Taming of the Shrew (S01E03) is the third ...,,76.0,6.0,1.0,2.0,0.0,2.0,1.0,79029,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",BBC One,2754
3,79029,375792,2005-11-28T21:30,S01E04,A Midsummer Night's Dream,A Midsummer Night's Dream (S01E04) is the four...,,66.0,4.0,0.0,0.0,0.0,4.0,0.0,79029,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",BBC One,2754
4,251999,4173236,2011-02-21T01:00,S01E01,The Mammy,The Mammy (S01E01) is the first episode of sea...,,58.0,1.0,0.0,0.0,0.0,0.0,0.0,251999,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",BBC One,2754
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81534,252486,4183419,T21:00,S01E08,,S01E08 is the eighth episode of season one of ...,,76.0,0.0,0.0,0.0,0.0,0.0,0.0,252486,Dual Survival,458,1,1,2,Discovery Channel,0.0,https://dg31sz3gwrwan.cloudfront.net/poster/25...,"[{'number': 1, 'nb_episodes': 10}, {'number': ...",Discovery Channel,62
81535,252486,4183421,T21:00,S01E09,,"S01E09 is the ninth episode of season one of ""...",,76.0,0.0,0.0,0.0,0.0,0.0,0.0,252486,Dual Survival,458,1,1,2,Discovery Channel,0.0,https://dg31sz3gwrwan.cloudfront.net/poster/25...,"[{'number': 1, 'nb_episodes': 10}, {'number': ...",Discovery Channel,62
81536,252486,4183422,T21:00,S01E10,,"S01E10 is the tenth episode of season one of ""...",,76.0,0.0,0.0,0.0,0.0,0.0,0.0,252486,Dual Survival,458,1,1,2,Discovery Channel,0.0,https://dg31sz3gwrwan.cloudfront.net/poster/25...,"[{'number': 1, 'nb_episodes': 10}, {'number': ...",Discovery Channel,62
81537,248584,4098218,2011-01-27T20:00,S01E01,Volume 1,Volume 1 (S01E01) is the first episode of sea...,,38.0,3.0,0.0,0.0,0.0,0.0,0.0,248584,Time Paladin Sakura,167,6,25,1,Discovery Channel,3.0,https://dg31sz3gwrwan.cloudfront.net/poster/24...,"[{'number': 1, 'nb_episodes': 2}]",Discovery Channel,62


In [9]:
us_data["network"].value_counts()

CBS                    9075
NBC                    8324
ABC (US)               7843
Netflix                5384
FOX                    4771
Cartoon Network        4721
Nickelodeon            3982
Disney Channel         2790
PBS                    2661
BBC One                2658
HBO                    1768
MTV                    1625
Syfy                   1343
BBC Two                1338
Discovery              1298
TLC                    1259
Disney XD              1180
History                1133
The CW                 1111
Comedy Central         1080
WOWOW                  1031
A&E                    1017
Amazon                  998
USA Network             849
Bravo                   836
Lifetime                765
National Geographic     719
VH1                     711
E!                      707
Adult Swim              634
TNT (US)                611
Hulu                    579
Food Network            544
Toon Disney             522
WWE Network             516
ABC Family          

In [22]:
# Dropping columns that are not required
clean_us_data = us_data.drop(columns=['id', 'USA Networks', 'Count of title', 'description'])
clean_us_data

Unnamed: 0,show_id,episode_id,time,episode,title,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons
0,79029,375789,2005-11-07T21:30,S01E01,Much Ado About Nothing,,74.0,5.0,3.0,5.0,0.0,0.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]"
1,79029,375790,2005-11-14T21:30,S01E02,Macbeth,,94.0,14.0,0.0,5.0,0.0,2.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]"
2,79029,375791,2005-11-21T21:30,S01E03,The Taming of the Shrew,,76.0,6.0,1.0,2.0,0.0,2.0,1.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]"
3,79029,375792,2005-11-28T21:30,S01E04,A Midsummer Night's Dream,,66.0,4.0,0.0,0.0,0.0,4.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]"
4,251999,4173236,2011-02-21T01:00,S01E01,The Mammy,,58.0,1.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81534,252486,4183419,T21:00,S01E08,,,76.0,0.0,0.0,0.0,0.0,0.0,0.0,Dual Survival,458,1,1,2,Discovery Channel,0.0,https://dg31sz3gwrwan.cloudfront.net/poster/25...,"[{'number': 1, 'nb_episodes': 10}, {'number': ..."
81535,252486,4183421,T21:00,S01E09,,,76.0,0.0,0.0,0.0,0.0,0.0,0.0,Dual Survival,458,1,1,2,Discovery Channel,0.0,https://dg31sz3gwrwan.cloudfront.net/poster/25...,"[{'number': 1, 'nb_episodes': 10}, {'number': ..."
81536,252486,4183422,T21:00,S01E10,,,76.0,0.0,0.0,0.0,0.0,0.0,0.0,Dual Survival,458,1,1,2,Discovery Channel,0.0,https://dg31sz3gwrwan.cloudfront.net/poster/25...,"[{'number': 1, 'nb_episodes': 10}, {'number': ..."
81537,248584,4098218,2011-01-27T20:00,S01E01,Volume 1,,38.0,3.0,0.0,0.0,0.0,0.0,0.0,Time Paladin Sakura,167,6,25,1,Discovery Channel,3.0,https://dg31sz3gwrwan.cloudfront.net/poster/24...,"[{'number': 1, 'nb_episodes': 2}]"


In [11]:
clean_us_data.dtypes

show_id                int64
episode_id             int64
time                  object
episode               object
title                 object
rating               float64
times_watched        float64
mood-good            float64
mood-fun             float64
mood-wow             float64
mood-sad             float64
mood-so-so           float64
mood-bad             float64
name                  object
followers              int64
nb_rates               int64
runtime                int64
number_of_seasons      int64
network               object
mean_rate            float64
poster_image          object
seasons               object
dtype: object

In [12]:
# Converting Object datatype of Time column to String
clean_us_data['time'] = clean_us_data['time'].astype('string')

In [13]:
# Parsing timeslot from the time data using string split function
clean_us_data['timeslot'] = clean_us_data['time'].str.split(pat='T').str[1]

In [14]:
# Parsing year from the time data using string split function
year = clean_us_data['time'].str.split(pat='T').str[0]
clean_us_data['year'] = year.str.split(pat='-').str[0]

In [15]:
clean_us_data['year'].value_counts()

        5331
2018    4830
2019    4569
2017    4338
2015    4096
        ... 
1922       8
1921       8
1939       6
2021       2
1923       1
Name: year, Length: 102, dtype: int64

In [16]:
clean_us_data.head(20)

Unnamed: 0,show_id,episode_id,time,episode,title,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons,timeslot,year
0,79029,375789,2005-11-07T21:30,S01E01,Much Ado About Nothing,,74.0,5.0,3.0,5.0,0.0,0.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",21:30,2005.0
1,79029,375790,2005-11-14T21:30,S01E02,Macbeth,,94.0,14.0,0.0,5.0,0.0,2.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",21:30,2005.0
2,79029,375791,2005-11-21T21:30,S01E03,The Taming of the Shrew,,76.0,6.0,1.0,2.0,0.0,2.0,1.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",21:30,2005.0
3,79029,375792,2005-11-28T21:30,S01E04,A Midsummer Night's Dream,,66.0,4.0,0.0,0.0,0.0,4.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",21:30,2005.0
4,251999,4173236,2011-02-21T01:00,S01E01,The Mammy,,58.0,1.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",01:00,2011.0
5,251999,4173237,2011-02-28T01:00,S01E02,Mammy's Secret,,50.0,0.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",01:00,2011.0
6,251999,4173238,2011-03-07T01:00,S01E03,Mammy's Merchandise,,48.0,0.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",01:00,2011.0
7,251999,4173239,2011-03-14T01:00,S01E04,Mammy Rides Again,,48.0,0.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",01:00,2011.0
8,251999,4173240,2011-03-21T01:00,S01E05,Mammy of the Groom,,48.0,0.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",01:00,2011.0
9,251999,4173241,2011-03-28T01:00,S01E06,Mammy's Miracle,,48.0,0.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",01:00,2011.0


In [30]:
clean_us_data.loc[clean_us_data['name'] == "SAINT SEIYA: Knights of the Zodiac"]

Unnamed: 0,show_id,episode_id,time,episode,title,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons


In [18]:
clean_us_data.loc[clean_us_data['runtime'] == 0].count()

show_id              776
episode_id           776
time                 776
episode              776
title                775
rating                15
times_watched        723
mood-good            776
mood-fun             776
mood-wow             776
mood-sad             776
mood-so-so           776
mood-bad             776
name                 776
followers            776
nb_rates             776
runtime              776
number_of_seasons    776
network              776
mean_rate            776
poster_image         776
seasons              776
timeslot             776
year                 776
dtype: int64

In [79]:
clean_us_data[clean_us_data['name'].str.contains('Awards', regex=False)]

Unnamed: 0,show_id,episode_id,time,episode,title,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons
252,235941,4657880,2006-02-19T21:00,S01E59,The 59th Orange British Academy Film Awards,,147.0,2.0,0.0,0.0,0.0,0.0,0.0,British Academy Film Awards,307,3,120,1,BBC One,3.61,https://dg31sz3gwrwan.cloudfront.net/poster/23...,"[{'number': 1, 'nb_episodes': 14}]"
253,235941,4657878,2007-02-11T21:00,S01E60,The 60th Orange British Academy Film Awards,,147.0,1.0,0.0,0.0,0.0,0.0,0.0,British Academy Film Awards,307,3,120,1,BBC One,3.61,https://dg31sz3gwrwan.cloudfront.net/poster/23...,"[{'number': 1, 'nb_episodes': 14}]"
254,235941,4657877,2008-02-10T21:00,S01E61,The 61st Orange British Academy Film Awards,,146.0,1.0,0.0,0.0,0.0,0.0,0.0,British Academy Film Awards,307,3,120,1,BBC One,3.61,https://dg31sz3gwrwan.cloudfront.net/poster/23...,"[{'number': 1, 'nb_episodes': 14}]"
255,235941,4657876,2009-02-08T21:00,S01E62,The 62nd Orange British Academy Film Awards,,147.0,1.0,0.0,0.0,0.0,0.0,0.0,British Academy Film Awards,307,3,120,1,BBC One,3.61,https://dg31sz3gwrwan.cloudfront.net/poster/23...,"[{'number': 1, 'nb_episodes': 14}]"
256,235941,4657874,2010-02-21T21:00,S01E63,The 63rd Orange British Academy Film Awards,,146.0,1.0,0.0,0.0,0.0,0.0,0.0,British Academy Film Awards,307,3,120,1,BBC One,3.61,https://dg31sz3gwrwan.cloudfront.net/poster/23...,"[{'number': 1, 'nb_episodes': 14}]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79914,289657,5079034,2014-11-30T22:00,S01E27,2014 Soul Train Music Awards,,58.0,0.0,0.0,1.0,0.0,0.0,0.0,Soul Train Music Awards,227,5,180,1,BET,4.01,https://dg31sz3gwrwan.cloudfront.net/poster/28...,"[{'number': 1, 'nb_episodes': 31}]"
79915,289657,5079035,2015-11-29T22:00,S01E28,2015 Soul Train Music Awards,,60.0,1.0,1.0,1.0,0.0,0.0,0.0,Soul Train Music Awards,227,5,180,1,BET,4.01,https://dg31sz3gwrwan.cloudfront.net/poster/28...,"[{'number': 1, 'nb_episodes': 31}]"
79916,289657,5079029,2016-11-27T22:00,S01E29,2016 Soul Train Music Awards,,59.0,4.0,0.0,0.0,0.0,0.0,0.0,Soul Train Music Awards,227,5,180,1,BET,4.01,https://dg31sz3gwrwan.cloudfront.net/poster/28...,"[{'number': 1, 'nb_episodes': 31}]"
79917,289657,6426761,2017-11-26T22:00,S01E30,2017 Soul Train Music Awards,,58.0,4.0,0.0,1.0,0.0,1.0,0.0,Soul Train Music Awards,227,5,180,1,BET,4.01,https://dg31sz3gwrwan.cloudfront.net/poster/28...,"[{'number': 1, 'nb_episodes': 31}]"


### Supervised Machine Learning - Data Pre Processing

In [34]:
ml_df = clean_us_data[["show_id","times_watched", 
                       "mood-good", "mood-fun", "mood-wow", "mood-sad", "mood-so-so", "mood-bad", 
                       "name", "followers", "runtime", "network"]]
ml_df

Unnamed: 0,show_id,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,runtime,network
0,79029,74.0,5.0,3.0,5.0,0.0,0.0,0.0,ShakespeaRe-Told,242,120,BBC One
1,79029,94.0,14.0,0.0,5.0,0.0,2.0,0.0,ShakespeaRe-Told,242,120,BBC One
2,79029,76.0,6.0,1.0,2.0,0.0,2.0,1.0,ShakespeaRe-Told,242,120,BBC One
3,79029,66.0,4.0,0.0,0.0,0.0,4.0,0.0,ShakespeaRe-Told,242,120,BBC One
4,251999,58.0,1.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,180,BBC One
...,...,...,...,...,...,...,...,...,...,...,...,...
81534,252486,76.0,0.0,0.0,0.0,0.0,0.0,0.0,Dual Survival,458,1,Discovery Channel
81535,252486,76.0,0.0,0.0,0.0,0.0,0.0,0.0,Dual Survival,458,1,Discovery Channel
81536,252486,76.0,0.0,0.0,0.0,0.0,0.0,0.0,Dual Survival,458,1,Discovery Channel
81537,248584,38.0,3.0,0.0,0.0,0.0,0.0,0.0,Time Paladin Sakura,167,25,Discovery Channel


In [39]:
ml_grouped = ml_df.groupby(by=["show_id"]).agg({'times_watched':'mean',"mood-good":'mean', "mood-fun":'mean', 
                                                   "mood-wow":'mean', "mood-sad":'mean', "mood-so-so":'mean', 
                                                   "mood-bad":'mean', "name":'first', "followers":'mean', 
                                                   "runtime":'mean', "network": 'first'})
ml_grouped

Unnamed: 0_level_0,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,runtime,network
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
70328,6253.193548,0.951613,0.080645,0.419355,0.016129,0.032258,0.225806,The Young and the Restless,16258,45,CBS
70329,239380.363636,700.454545,1254.000000,118.545455,3.272727,11.272727,4.636364,My Wife and Kids,296084,25,ABC (US)
70334,24.487544,0.017794,0.000000,0.000000,0.000000,0.000000,0.000000,The Tonight Show Starring Johnny Carson,177,45,NBC
70336,84.091324,0.000000,0.004566,0.000000,0.004566,0.000000,0.000000,The Tonight Show with Jay Leno,383,45,NBC
70360,107.666667,0.833333,0.833333,1.166667,0.000000,0.000000,0.000000,The Day Today,163,30,BBC Two
...,...,...,...,...,...,...,...,...,...,...,...
370467,841.000000,1.750000,0.000000,5.250000,0.000000,0.750000,0.000000,Unnatural Selection,1652,65,Netflix
370471,15.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,Steven Universe Future,480,10,Cartoon Network
370530,106.333333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,Destination Fear (2019),191,60,Travel Channel
370863,39.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,Matchmaker Mysteries,190,85,Hallmark Channel


In [77]:
X = ml_grouped.drop(columns=["name","network", "times_watched", "followers", "runtime"])
X

Unnamed: 0_level_0,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70328,0.951613,0.080645,0.419355,0.016129,0.032258,0.225806
70329,700.454545,1254.000000,118.545455,3.272727,11.272727,4.636364
70334,0.017794,0.000000,0.000000,0.000000,0.000000,0.000000
70336,0.000000,0.004566,0.000000,0.004566,0.000000,0.000000
70360,0.833333,0.833333,1.166667,0.000000,0.000000,0.000000
...,...,...,...,...,...,...
370467,1.750000,0.000000,5.250000,0.000000,0.750000,0.000000
370471,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
370530,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
370863,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [78]:
X.loc[(X!=0).any(axis=1)]

Unnamed: 0_level_0,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70328,0.951613,0.080645,0.419355,0.016129,0.032258,0.225806
70329,700.454545,1254.000000,118.545455,3.272727,11.272727,4.636364
70334,0.017794,0.000000,0.000000,0.000000,0.000000,0.000000
70336,0.000000,0.004566,0.000000,0.004566,0.000000,0.000000
70360,0.833333,0.833333,1.166667,0.000000,0.000000,0.000000
...,...,...,...,...,...,...
370139,22.125000,0.125000,8.000000,0.000000,2.375000,1.750000
370275,1.500000,0.000000,0.000000,0.000000,0.000000,0.000000
370277,3.166667,0.000000,0.333333,0.000000,0.666667,0.166667
370467,1.750000,0.000000,5.250000,0.000000,0.750000,0.000000


In [42]:
y = ml_grouped[["network"]]
y

Unnamed: 0_level_0,network
show_id,Unnamed: 1_level_1
70328,CBS
70329,ABC (US)
70334,NBC
70336,NBC
70360,BBC Two
...,...
370467,Netflix
370471,Cartoon Network
370530,Travel Channel
370863,Hallmark Channel


In [55]:
y_label = LabelEncoder().fit_transform(y['network'])
y_label

array([17,  2, 37, ..., 48, 32, 39])

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y_label, random_state=1)

In [57]:
X_train

Unnamed: 0_level_0,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,runtime
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
127351,104.777778,22.111111,19.888889,1.000000,3.000000,5.000000,45
76814,9.047619,0.428571,2.142857,0.047619,0.000000,0.000000,25
274453,5.666667,0.666667,1.250000,0.000000,0.000000,0.000000,45
232471,4.333333,0.000000,0.000000,0.000000,0.333333,0.000000,60
357525,1.333333,0.000000,1.000000,0.000000,0.333333,0.333333,85
...,...,...,...,...,...,...,...
77164,0.075000,0.000000,0.000000,0.000000,0.000000,0.000000,60
352207,35.900000,46.300000,3.200000,0.000000,1.900000,0.500000,30
308984,130.333333,0.666667,110.333333,1.000000,5.500000,0.166667,60
72167,426.304348,3.521739,70.304348,42.782609,11.173913,2.130435,45


In [58]:
X_test

Unnamed: 0_level_0,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,runtime
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
78087,0.090909,0.000000,0.000000,0.000000,0.000000,0.000000,45
112451,112.769231,0.384615,75.307692,2.153846,1.000000,0.076923,45
248936,40.454545,1.863636,8.772727,0.545455,1.772727,1.500000,45
361034,436.000000,13.666667,196.833333,0.333333,8.000000,0.833333,30
354089,9.142857,0.000000,2.428571,0.000000,0.285714,0.000000,45
...,...,...,...,...,...,...,...
348922,2143.400000,2559.100000,533.800000,3.300000,132.400000,44.500000,30
85038,5.833333,4.333333,1.416667,0.000000,0.000000,0.000000,25
249025,1.192308,0.269231,0.038462,0.038462,0.038462,0.038462,30
71373,8.857143,0.190476,1.190476,0.428571,0.666667,0.095238,45


In [59]:
y_train

array([36, 23, 30, ..., 12, 17, 39])

In [60]:
y_test

array([17, 21, 37, ..., 48, 17, 26])

In [61]:
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_train_scaled

array([[-0.12989588, -0.07058308, -0.0883369 , ..., -0.16517713,
         0.05868887,  0.15371575],
       [-0.19654974, -0.14993099, -0.09597012, ..., -0.21287158,
        -0.15350162, -0.7397711 ],
       [-0.19890379, -0.14905967, -0.09635417, ..., -0.21287158,
        -0.15350162,  0.15371575],
       ...,
       [-0.11210237, -0.14905967, -0.04943342, ..., -0.12543176,
        -0.1464286 ,  0.82383088],
       [ 0.0939728 , -0.13861145, -0.06665136, ..., -0.03522706,
        -0.06309002,  0.15371575],
       [-0.13647175, -0.14966959, -0.09072655, ...,  0.09184292,
         0.39112064, -0.7397711 ]])

In [62]:
X_test_scaled = scaler.transform(X_test)
X_test_scaled

array([[-0.20278601, -0.15149936, -0.09689184, ..., -0.21287158,
        -0.15350162,  0.15371575],
       [-0.12433169, -0.15009185, -0.06449923, ..., -0.19697343,
        -0.15023715,  0.15371575],
       [-0.1746821 , -0.14467932, -0.09311837, ..., -0.1846885 ,
        -0.08984447,  0.15371575],
       ...,
       [-0.20201914, -0.1505141 , -0.0968753 , ..., -0.21226011,
        -0.15186938, -0.51639939],
       [-0.19668236, -0.1508023 , -0.09637978, ..., -0.20227281,
        -0.14945989,  0.15371575],
       [ 0.77305743,  0.02049855, -0.02349975, ...,  0.66351381,
         0.40880318, -0.7397711 ]])

In [63]:
print(X_test_scaled.shape, y_test.shape, X_train_scaled.shape, y_train.shape)

(1456, 7) (1456,) (4367, 7) (4367,)


### Logistic Regression

In [64]:
classifier = LogisticRegression(max_iter = 2000)
classifier

LogisticRegression(max_iter=2000)

In [65]:
# Fit the model
classifier.fit(X_train_scaled, y_train)

LogisticRegression(max_iter=2000)

In [67]:
# Display Training & Testing Score
print(f"Training Data Score (Logistic Regression Model): {classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score (Logistic Regression Model): {classifier.score(X_test_scaled, y_test)}")

Training Data Score (Logistic Regression Model): 0.12617357453629494
Testing Data Score (Logistic Regression Model): 0.13186813186813187


In [70]:
# Predict Y values using the model
y_pred_lr = classifier.predict(X_test_scaled)
y_pred_lr

array([39, 39, 39, ...,  2, 39, 39])

In [None]:
[[30, 40, 50, 12, .5]]

In [71]:
print(classification_report(y_test, y_pred_lr))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        30
           1       0.00      0.00      0.00         8
           2       0.10      0.38      0.16       112
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         7
           5       0.00      0.00      0.00        19
           6       0.00      0.00      0.00        26
           7       0.00      0.00      0.00        15
           8       0.00      0.00      0.00         4
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         2
          11       0.00      0.00      0.00         6
          12       0.20      0.51      0.28       106
          13       0.00      0.00      0.00        18
          14       0.00      0.00      0.00        52
          15       0.00      0.00      0.00         4
          16       0.00      0.00      0.00        22
          17       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Random Forests

In [None]:
# Train a Random Forest Classifier model and print the model score
clf = RandomForestClassifier(random_state=1, n_estimators=100).fit(X_train_scaled, y_train)