In [1]:
import pandas as pd
import csv
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

In [2]:
# Reading TV Time CSV Data
tvshows_df = pd.read_csv("Data/tvtimeshows.csv")
tvshows_df.head()

Unnamed: 0,id,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons
0,288128,X-Ray & Vav,230,10,9,2,Rooster Teeth,4.2,https://dg31sz3gwrwan.cloudfront.net/poster/28...,"[{'number': 1, 'nb_episodes': 4}, {'number': 2..."
1,313803,Sister's Slam Dunk,1298,100,80,2,KBS TV2,4.25,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 33}, {'number': ..."
2,325462,Reverse,270,15,60,1,Tokyo Broadcasting System,4.63,https://dg31sz3gwrwan.cloudfront.net/poster/32...,"[{'number': 1, 'nb_episodes': 10}]"
3,325198,Frame Arms Girl,900,24,25,1,Tokyo MX,3.54,https://dg31sz3gwrwan.cloudfront.net/poster/32...,"[{'number': 1, 'nb_episodes': 12}]"
4,349743,Mr Inbetween,7123,118,25,2,FX,4.42,https://dg31sz3gwrwan.cloudfront.net/poster/34...,"[{'number': 1, 'nb_episodes': 6}, {'number': 2..."


In [4]:
tvshows_df["network"].value_counts().head(10)

YouTube     715
Netflix     656
Tokyo MX    540
ABC (US)    495
TV Tokyo    451
NBC         429
BBC One     405
CBS         393
Fuji TV     336
FOX         288
Name: network, dtype: int64

In [5]:
# Reading All Episodes CSV Data
episode_df = pd.read_csv("Data/all_episodes.csv")
episode_df.head()

Unnamed: 0,show_id,episode_id,time,episode,title,description,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad
0,315103,5697528,2016-10-05T03:00,S01E01,Idiots with Numbers!,Idiots with Numbers! (S01E01) is the first epi...,9.48,12307.0,175.0,388.0,136.0,1.0,16.0,3.0
1,315103,5774664,2016-10-12T03:00,S01E02,The Inmates Are Stupid! The Guards Are Kind of...,The Inmates Are Stupid! The Guards Are Kind of...,9.52,11423.0,156.0,349.0,82.0,0.0,5.0,3.0
2,315103,5774665,2016-10-19T03:00,S01E03,Another Idiot Has Come!!,Another Idiot Has Come!! (S01E03) is the third...,9.36,11067.0,145.0,339.0,63.0,0.0,7.0,1.0
3,315103,5774666,2016-10-26T03:00,S01E04,Happy New Year! The New Year's Tournament Is W...,Happy New Year! The New Year's Tournament Is W...,8.36,10552.0,134.0,270.0,109.0,0.0,3.0,3.0
4,315103,5774667,2016-11-02T03:00,S01E05,A Fraud and a Hero,A Fraud and a Hero (S01E05) is the fifth episo...,8.94,10340.0,106.0,183.0,223.0,1.0,2.0,2.0


In [6]:
pd.set_option('display.max_columns', None)

In [7]:
# Merging both dataframe on Show Id
merged_df = pd.merge(episode_df, tvshows_df, how="inner", left_on="show_id", right_on="id")
merged_df.head()

Unnamed: 0,show_id,episode_id,time,episode,title,description,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,id,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons
0,315103,5697528,2016-10-05T03:00,S01E01,Idiots with Numbers!,Idiots with Numbers! (S01E01) is the first epi...,9.48,12307.0,175.0,388.0,136.0,1.0,16.0,3.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."
1,315103,5774664,2016-10-12T03:00,S01E02,The Inmates Are Stupid! The Guards Are Kind of...,The Inmates Are Stupid! The Guards Are Kind of...,9.52,11423.0,156.0,349.0,82.0,0.0,5.0,3.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."
2,315103,5774665,2016-10-19T03:00,S01E03,Another Idiot Has Come!!,Another Idiot Has Come!! (S01E03) is the third...,9.36,11067.0,145.0,339.0,63.0,0.0,7.0,1.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."
3,315103,5774666,2016-10-26T03:00,S01E04,Happy New Year! The New Year's Tournament Is W...,Happy New Year! The New Year's Tournament Is W...,8.36,10552.0,134.0,270.0,109.0,0.0,3.0,3.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."
4,315103,5774667,2016-11-02T03:00,S01E05,A Fraud and a Hero,A Fraud and a Hero (S01E05) is the fifth episo...,8.94,10340.0,106.0,183.0,223.0,1.0,2.0,2.0,315103,Nanbaka,16617,620,25,2,MBS,4.57,https://dg31sz3gwrwan.cloudfront.net/poster/31...,"[{'number': 1, 'nb_episodes': 13}, {'number': ..."


In [8]:
merged_df["network"].value_counts().head(20)

Rede Globo                26076
YouTube                   20754
TV Tokyo                  12529
Canal de las Estrellas    10343
CBS                        9075
NBC                        8324
Fuji TV                    8177
MBC                        8002
ABC (US)                   7843
Tokyo MX                   7062
Telemundo                  6716
TV Asahi                   6322
KBS TV2                    5920
Netflix                    5384
SBS (KR)                   5082
FOX                        4771
Cartoon Network            4721
TF1                        4642
SBT                        4431
TVN                        4045
Name: network, dtype: int64

In [9]:
# Reading only American Network Names and sorting it by count of titles
us_network = pd.read_csv("Data/network_names.csv")
us_network = us_network.sort_values(by='Count of title', ascending=False, ignore_index=True)
us_network

Unnamed: 0,USA Networks,Count of title
0,CBS,9075
1,NBC,8324
2,ABC (US),7843
3,Netflix,5384
4,FOX,4771
5,Cartoon Network,4721
6,Nickelodeon,4147
7,Disney Channel,2803
8,BBC One,2754
9,PBS,2661


In [10]:
# Top 15 American Networks
top_us = us_network.head(15)
top_us

Unnamed: 0,USA Networks,Count of title
0,CBS,9075
1,NBC,8324
2,ABC (US),7843
3,Netflix,5384
4,FOX,4771
5,Cartoon Network,4721
6,Nickelodeon,4147
7,Disney Channel,2803
8,BBC One,2754
9,PBS,2661


In [11]:
# Inner Merge on Merged Dataframe and Top 15 American Network Data
top_us_data = pd.merge(merged_df, top_us, how="inner", left_on="network", right_on="USA Networks")
top_us_data

Unnamed: 0,show_id,episode_id,time,episode,title,description,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,id,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons,USA Networks,Count of title
0,79029,375789,2005-11-07T21:30,S01E01,Much Ado About Nothing,Much Ado About Nothing (S01E01) is the first e...,,74.0,5.0,3.0,5.0,0.0,0.0,0.0,79029,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",BBC One,2754
1,79029,375790,2005-11-14T21:30,S01E02,Macbeth,Macbeth (S01E02) is the second episode of seas...,,94.0,14.0,0.0,5.0,0.0,2.0,0.0,79029,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",BBC One,2754
2,79029,375791,2005-11-21T21:30,S01E03,The Taming of the Shrew,The Taming of the Shrew (S01E03) is the third ...,,76.0,6.0,1.0,2.0,0.0,2.0,1.0,79029,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",BBC One,2754
3,79029,375792,2005-11-28T21:30,S01E04,A Midsummer Night's Dream,A Midsummer Night's Dream (S01E04) is the four...,,66.0,4.0,0.0,0.0,0.0,4.0,0.0,79029,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",BBC One,2754
4,251999,4173236,2011-02-21T01:00,S01E01,The Mammy,The Mammy (S01E01) is the first episode of sea...,,58.0,1.0,0.0,0.0,0.0,0.0,0.0,251999,Mrs. Brown's Boys,193,9,180,1,BBC One,3.00,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",BBC One,2754
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59576,262184,4392610,2008-10-19T22:00,S2008E03,The Last Enemy (3),The Last Enemy (3) (S2008E03) is the third epi...,,21.0,0.0,0.0,0.0,0.0,0.0,0.0,262184,Masterpiece Contemporary,307,2,90,4,PBS,0.19,https://dg31sz3gwrwan.cloudfront.net/poster/26...,"[{'number': 2008, 'nb_episodes': 7}, {'number'...",PBS,2661
59577,262184,4392611,2008-10-26T22:00,S2008E04,The Last Enemy (4),The Last Enemy (4) (S2008E04) is the fourth ep...,,21.0,0.0,0.0,0.0,0.0,0.0,0.0,262184,Masterpiece Contemporary,307,2,90,4,PBS,0.19,https://dg31sz3gwrwan.cloudfront.net/poster/26...,"[{'number': 2008, 'nb_episodes': 7}, {'number'...",PBS,2661
59578,262184,4392612,2008-11-02T22:00,S2008E05,The Last Enemy (5),The Last Enemy (5) (S2008E05) is the fifth epi...,,21.0,0.0,0.0,0.0,0.0,0.0,0.0,262184,Masterpiece Contemporary,307,2,90,4,PBS,0.19,https://dg31sz3gwrwan.cloudfront.net/poster/26...,"[{'number': 2008, 'nb_episodes': 7}, {'number'...",PBS,2661
59579,262184,4392613,2008-11-09T22:00,S2008E06,God on Trial,God on Trial (S2008E06) is the sixth episode o...,,19.0,0.0,0.0,0.0,0.0,0.0,0.0,262184,Masterpiece Contemporary,307,2,90,4,PBS,0.19,https://dg31sz3gwrwan.cloudfront.net/poster/26...,"[{'number': 2008, 'nb_episodes': 7}, {'number'...",PBS,2661


In [12]:
# Dropping columns that are not required
us_df1 = top_us_data.drop(columns=['id', 'USA Networks', 'Count of title', 'description'])
us_df1

Unnamed: 0,show_id,episode_id,time,episode,title,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons
0,79029,375789,2005-11-07T21:30,S01E01,Much Ado About Nothing,,74.0,5.0,3.0,5.0,0.0,0.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]"
1,79029,375790,2005-11-14T21:30,S01E02,Macbeth,,94.0,14.0,0.0,5.0,0.0,2.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]"
2,79029,375791,2005-11-21T21:30,S01E03,The Taming of the Shrew,,76.0,6.0,1.0,2.0,0.0,2.0,1.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]"
3,79029,375792,2005-11-28T21:30,S01E04,A Midsummer Night's Dream,,66.0,4.0,0.0,0.0,0.0,4.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]"
4,251999,4173236,2011-02-21T01:00,S01E01,The Mammy,,58.0,1.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.00,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59576,262184,4392610,2008-10-19T22:00,S2008E03,The Last Enemy (3),,21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,https://dg31sz3gwrwan.cloudfront.net/poster/26...,"[{'number': 2008, 'nb_episodes': 7}, {'number'..."
59577,262184,4392611,2008-10-26T22:00,S2008E04,The Last Enemy (4),,21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,https://dg31sz3gwrwan.cloudfront.net/poster/26...,"[{'number': 2008, 'nb_episodes': 7}, {'number'..."
59578,262184,4392612,2008-11-02T22:00,S2008E05,The Last Enemy (5),,21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,https://dg31sz3gwrwan.cloudfront.net/poster/26...,"[{'number': 2008, 'nb_episodes': 7}, {'number'..."
59579,262184,4392613,2008-11-09T22:00,S2008E06,God on Trial,,19.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,https://dg31sz3gwrwan.cloudfront.net/poster/26...,"[{'number': 2008, 'nb_episodes': 7}, {'number'..."


In [13]:
us_df1.dtypes

show_id                int64
episode_id             int64
time                  object
episode               object
title                 object
rating               float64
times_watched        float64
mood-good            float64
mood-fun             float64
mood-wow             float64
mood-sad             float64
mood-so-so           float64
mood-bad             float64
name                  object
followers              int64
nb_rates               int64
runtime                int64
number_of_seasons      int64
network               object
mean_rate            float64
poster_image          object
seasons               object
dtype: object

In [14]:
# Converting Object datatype of Time column to String
us_df1['time'] = us_df1['time'].astype('string')

In [15]:
# Parsing timeslot from the time data using string split function
us_df1['timeslot'] = us_df1['time'].str.split(pat='T').str[1]

In [16]:
# Parsing year from the time data using string split function
year = us_df1['time'].str.split(pat='T').str[0]
us_df1['year'] = year.str.split(pat='-').str[0]

In [17]:
us_df1.head()

Unnamed: 0,show_id,episode_id,time,episode,title,rating,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,poster_image,seasons,timeslot,year
0,79029,375789,2005-11-07T21:30,S01E01,Much Ado About Nothing,,74.0,5.0,3.0,5.0,0.0,0.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",21:30,2005
1,79029,375790,2005-11-14T21:30,S01E02,Macbeth,,94.0,14.0,0.0,5.0,0.0,2.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",21:30,2005
2,79029,375791,2005-11-21T21:30,S01E03,The Taming of the Shrew,,76.0,6.0,1.0,2.0,0.0,2.0,1.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",21:30,2005
3,79029,375792,2005-11-28T21:30,S01E04,A Midsummer Night's Dream,,66.0,4.0,0.0,0.0,0.0,4.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.4,https://dg31sz3gwrwan.cloudfront.net/poster/79...,"[{'number': 1, 'nb_episodes': 4}]",21:30,2005
4,251999,4173236,2011-02-21T01:00,S01E01,The Mammy,,58.0,1.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.0,https://d36rlb2fgh8cjd.cloudfront.net/default-...,"[{'number': 1, 'nb_episodes': 6}]",01:00,2011


In [18]:
del us_df1['time']
del us_df1['rating']
del us_df1['poster_image']
del us_df1['seasons']

In [19]:
us_df1

Unnamed: 0,show_id,episode_id,episode,title,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,timeslot,year
0,79029,375789,S01E01,Much Ado About Nothing,74.0,5.0,3.0,5.0,0.0,0.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,21:30,2005
1,79029,375790,S01E02,Macbeth,94.0,14.0,0.0,5.0,0.0,2.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,21:30,2005
2,79029,375791,S01E03,The Taming of the Shrew,76.0,6.0,1.0,2.0,0.0,2.0,1.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,21:30,2005
3,79029,375792,S01E04,A Midsummer Night's Dream,66.0,4.0,0.0,0.0,0.0,4.0,0.0,ShakespeaRe-Told,242,8,120,1,BBC One,3.40,21:30,2005
4,251999,4173236,S01E01,The Mammy,58.0,1.0,0.0,0.0,0.0,0.0,0.0,Mrs. Brown's Boys,193,9,180,1,BBC One,3.00,01:00,2011
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59576,262184,4392610,S2008E03,The Last Enemy (3),21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,22:00,2008
59577,262184,4392611,S2008E04,The Last Enemy (4),21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,22:00,2008
59578,262184,4392612,S2008E05,The Last Enemy (5),21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,22:00,2008
59579,262184,4392613,S2008E06,God on Trial,19.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,22:00,2008


In [21]:
us_df1.to_csv("US_Data/top15networks.csv")

In [26]:
us_df2 = us_df1.loc[(us_df1['runtime'] >= 15) & (us_df1['runtime'] <= 90)]
us_df2

Unnamed: 0,show_id,episode_id,episode,title,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,timeslot,year
10,348204,7380549,S01E01,Episode 1,2859.0,60.0,0.0,13.0,0.0,5.0,0.0,The War of the Worlds,12893,0,60,1,BBC One,0.00,21:00,2019
11,348204,7380550,S01E02,Episode 2,1879.0,41.0,0.0,7.0,0.0,6.0,0.0,The War of the Worlds,12893,0,60,1,BBC One,0.00,21:00,
12,348204,7380551,S01E03,Episode 3,1082.0,18.0,0.0,1.0,1.0,4.0,1.0,The War of the Worlds,12893,0,60,1,BBC One,0.00,21:00,
13,76332,435387,S01E01,Part 1,184.0,13.0,1.0,4.0,3.0,1.0,0.0,David Copperfield (1999),325,16,90,1,BBC One,3.72,19:00,1999
14,76332,435388,S01E02,Part 2,173.0,12.0,1.0,2.0,0.0,1.0,1.0,David Copperfield (1999),325,16,90,1,BBC One,3.72,19:00,1999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59576,262184,4392610,S2008E03,The Last Enemy (3),21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,22:00,2008
59577,262184,4392611,S2008E04,The Last Enemy (4),21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,22:00,2008
59578,262184,4392612,S2008E05,The Last Enemy (5),21.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,22:00,2008
59579,262184,4392613,S2008E06,God on Trial,19.0,0.0,0.0,0.0,0.0,0.0,0.0,Masterpiece Contemporary,307,2,90,4,PBS,0.19,22:00,2008


In [28]:
us_df2.isnull().sum(axis=0)

show_id                0
episode_id             0
episode                1
title                470
times_watched         88
mood-good              1
mood-fun               1
mood-wow               1
mood-sad               1
mood-so-so             1
mood-bad               1
name                   0
followers              0
nb_rates               0
runtime                0
number_of_seasons      0
network                0
mean_rate              0
timeslot               1
year                   1
dtype: int64

In [29]:
us_df2['runtime'].value_counts().sort_index()

15     1311
16       16
17       11
18        1
20     1042
25    20234
29       12
30     5667
35      248
40      297
45    17828
50      925
55      652
60     3710
65      348
70       52
75       34
80       27
85      578
90      419
Name: runtime, dtype: int64

In [30]:
# pd.set_option('display.max_rows', None)
us_df2.loc[us_df2['runtime'] == 15].sort_values(by="runtime")

Unnamed: 0,show_id,episode_id,episode,title,times_watched,mood-good,mood-fun,mood-wow,mood-sad,mood-so-so,mood-bad,name,followers,nb_rates,runtime,number_of_seasons,network,mean_rate,timeslot,year
87,73854,130895,S01E01,Pat's Finding Day,480.0,2.0,0.0,1.0,0.0,0.0,0.0,Postman Pat,466,13,15,8,BBC One,4.44,00:00,1981
47197,297291,5459576,S01E131,Rien de personnel,54.0,1.0,0.0,0.0,0.0,0.0,0.0,Peanuts (2014),254,7,15,1,Cartoon Network,3.56,10:00,2015
47196,297291,5263348,S01E130,Love story,54.0,1.0,0.0,0.0,0.0,0.0,0.0,Peanuts (2014),254,7,15,1,Cartoon Network,3.56,10:00,2015
47195,297291,5459575,S01E129,Faisons le point,54.0,1.0,0.0,0.0,0.0,0.0,0.0,Peanuts (2014),254,7,15,1,Cartoon Network,3.56,10:00,2015
47194,297291,5459574,S01E128,Pourparlers,54.0,1.0,0.0,0.0,0.0,0.0,0.0,Peanuts (2014),254,7,15,1,Cartoon Network,3.56,10:00,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38377,74149,1182881,S01E21,"Up, Up, and a Koopa",246.0,2.0,2.0,0.0,0.0,0.0,0.0,The Adventures of Super Mario Bros. 3,501,17,15,1,NBC,3.06,11:30,1990
38376,74149,1182871,S01E20,Life's Ruff,249.0,2.0,2.0,0.0,0.0,0.0,0.0,The Adventures of Super Mario Bros. 3,501,17,15,1,NBC,3.06,11:30,1990
38375,74149,1182861,S01E19,Crimes R Us,248.0,4.0,1.0,0.0,0.0,0.0,0.0,The Adventures of Super Mario Bros. 3,501,17,15,1,NBC,3.06,11:30,1990
39337,267198,4508450,S01E04,Casper's Camp-Out,156.0,2.0,0.0,0.0,0.0,0.0,0.0,Casper and the Angels,235,1,15,1,NBC,3.00,00:00,1979


In [31]:
us_df2[us_df2['name'].str.contains('CMA', regex=False)].count()

show_id              79
episode_id           79
episode              79
title                79
times_watched        78
mood-good            79
mood-fun             79
mood-wow             79
mood-sad             79
mood-so-so           79
mood-bad             79
name                 79
followers            79
nb_rates             79
runtime              79
number_of_seasons    79
network              79
mean_rate            79
timeslot             79
year                 79
dtype: int64

### Supervised Machine Learning - Data Pre Processing

In [None]:
ml_df = us_df1[["show_id","times_watched", 
                       "mood-good", "mood-fun", "mood-wow", "mood-sad", "mood-so-so", "mood-bad", 
                       "name", "followers", "runtime", "network"]]
ml_df

In [None]:
ml_grouped = ml_df.groupby(by=["show_id"]).agg({'times_watched':'mean',"mood-good":'mean', "mood-fun":'mean', 
                                                   "mood-wow":'mean', "mood-sad":'mean', "mood-so-so":'mean', 
                                                   "mood-bad":'mean', "name":'first', "followers":'mean', 
                                                   "runtime":'mean', "network": 'first'})
ml_grouped

In [None]:
X = ml_grouped.drop(columns=["name","network", "times_watched", "followers", "runtime"])
X

In [None]:
X.loc[(X!=0).any(axis=1)]

In [None]:
y = ml_grouped[["network"]]
y

In [None]:
y_label = LabelEncoder().fit_transform(y['network'])
y_label

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_label, random_state=1)

In [None]:
X_train

In [None]:
X_test

In [None]:
y_train

In [None]:
y_test

In [None]:
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_train_scaled

In [None]:
X_test_scaled = scaler.transform(X_test)
X_test_scaled

In [None]:
print(X_test_scaled.shape, y_test.shape, X_train_scaled.shape, y_train.shape)

### Logistic Regression

In [None]:
classifier = LogisticRegression(max_iter = 2000)
classifier

In [None]:
# Fit the model
classifier.fit(X_train_scaled, y_train)

In [None]:
# Display Training & Testing Score
print(f"Training Data Score (Logistic Regression Model): {classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score (Logistic Regression Model): {classifier.score(X_test_scaled, y_test)}")

In [None]:
# Predict Y values using the model
y_pred_lr = classifier.predict(X_test_scaled)
y_pred_lr

In [None]:
[[30, 40, 50, 12, .5]]

In [None]:
print(classification_report(y_test, y_pred_lr))

### Random Forests

In [None]:
# Train a Random Forest Classifier model and print the model score
clf = RandomForestClassifier(random_state=1, n_estimators=100).fit(X_train_scaled, y_train)