In [211]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt


%load_ext autoreload
%autoreload 2

In [212]:
features_df = pd.read_csv('Datasets/scraped/features.csv')
episodes_df = pd.read_csv('Datasets/data-society-the-simpsons-by-the-data/simpsons_episodes.csv')
voting_df = pd.read_csv('Datasets/scraped/voting_demographics.csv')

voting_df['season'] = voting_df['season'].astype(int)
voting_df['episode'] = voting_df['episode'].astype(int)
voting_df = voting_df.sort_values(by=['season', 'episode'])
voting_df.index += 1
features_df.index +=1
analysis_df = pd.concat([features_df.head(600), voting_df.head(600)], axis=1)
analysis_df['season'] = analysis_df['season'].astype(int)
analysis_df['episode'] = analysis_df['episode'].astype(int)

In [213]:
def smooth(scalars, weight):
    last = scalars[0]
    smoothed = list()
    for point in scalars:
        smoothed_val = last * weight + (1 - weight) * point
        smoothed.append(smoothed_val)
        last = smoothed_val

    return smoothed

def normalize(data, column):
    return (data[column]-data[column].mean())/data[column].std()

In [214]:
analysis_df.describe()

In [215]:
rating_cols = ['total_rating', '<18_rating', '18-29_rating', '30-44_rating', '45+_rating']
ref_cols = ['cult_refs_count', 'self_refs_count', 'goofs_count', 'errors_count']

In [216]:
def ratings_refs_graph(data, col, refs):
    y1 = smooth(list(normalize(data, refs)), 0.9)
    y2 = smooth(list(normalize(data, col)), 0.9)
    plt.plot(y1, label='Total rating')
    plt.plot(y2, label='Cult. refs.')
    plt.legend()
    plt.ylabel('Normalized scale')
    plt.xlabel('Episodes')
    plt.title(col)
    plt.show()

In [217]:
for col in rating_cols:
    ratings_refs_graph(analysis_df, col, 'cult_refs_count')

In [218]:
season_granular_mean = analysis_df.groupby(by='season').mean().add_prefix('mean_')
season_granular_sum = analysis_df.groupby(by='season').sum().add_prefix('sum_')
season_granular_analysis = pd.concat([season_granular_mean, season_granular_sum], axis=1)

In [219]:
ratings_refs_graph(season_granular_analysis, 'mean_total_rating', 'mean_cult_refs_count')

In [220]:
ratings_refs_graph(season_granular_analysis, 'mean_total_rating', 'sum_cult_refs_count')

In [232]:
plt.plot(10)

In [231]:
plt.plot(smooth(list(normalize(season_granular_analysis, 'sum_goofs_count')), 0.5))
plt.plot(smooth(list(normalize(season_granular_analysis, 'mean_total_rating')), 0.5))
plt.show()

In [230]:
plt.plot(smooth(list(normalize(season_granular_analysis, 'sum_self_refs_count')), 0.5))
plt.plot(smooth(list(normalize(season_granular_analysis, 'mean_total_rating')), 0.5))

In [228]:
for season in analysis_df['season'].unique():
    test_df = analysis_df.loc[analysis_df['season']==season].reset_index().reset_index()
    test_df['level_0'] = (test_df['level_0'] - test_df['level_0'].min()) / (test_df['level_0'].max() - test_df['level_0'].min())
    test_df = test_df.set_index('level_0')
    #plt.plot(smooth(test_df['self_refs_count'], 0.3))
    plt.plot((test_df['self_refs_count']), label=season)
plt.legend()
plt.show()

In [229]:
season_refs_list = []
for season in analysis_df['season'].unique():
    test_df = analysis_df.loc[analysis_df['season']==season].reset_index().reset_index()
    test_df['level_0'] = (test_df['level_0'] - test_df['level_0'].min()) / (test_df['level_0'].max() - test_df['level_0'].min())
    test_df = test_df.set_index('level_0')
    season_refs_list.append(test_df)

In [225]:
season_refs_df = pd.concat(season_refs_list,axis=0)

In [226]:
season_refs_df.head()

In [227]:
plt.plot(smooth(list(season_refs_df['cult_refs_count']),0.99))