In [None]:
import pandas as pd
import numpy as np
import os
import sys
sys.path.append('../')

import matplotlib.pyplot as plt
from src.mediacompanycasestudy.config import data_dir_external
from src.mediacompanycasestudy.config import filename

from src.dataprocessor.read_data import data_read
from src.dataprocessor.read_data import day_calculation

from src.visualization.graphs import day_view_show
from src.visualization.graphs import scatter_plot
from src.visualization.graphs import day_viewshow_ad_impression
from src.visualization.graphs import corelation_graph
from src.visualization.graphs import actual_predictions
from src.visualization.graphs import actual_predict_errors

from src.models.model_serializer import serializeModel
from src.models.model_serializer import deserializeModel

from src.models.datamodels import data_model
from src.models.datamodels import weekend_variable
from src.models.datamodels import lag_variable

from src.models.model_predictions import predictions

import warnings
warnings.filterwarnings('ignore')

In [None]:
absfilename = os.path.abspath(data_dir_external/filename)
absfilename

In [None]:
media = data_read(absfilename)
media.head()

In [None]:
media['Date'] = pd.to_datetime(media['Date'])
media.head()

In [None]:
media=day_calculation(media)
media.head()

In [None]:
# Cleaning days
media['day'] = media['day'].astype(str)
media['day'] = media['day'].map(lambda x: x[0:2])
media['day'] = media['day'].astype(int)

media.head()

In [None]:
day_view_show(media)

In [None]:
scatter_plot(media)

In [None]:
day_viewshow_ad_impression(media)

## Data Preparation

In [None]:
# Derived Metrics
# Weekdays are taken such that 1 corresponds to Sunday and 7 to Saturday
# Generate the weekday variable
media['weekday'] = (media['day']+3)%7
media.weekday.replace(0,7, inplace=True)
media['weekday'] = media['weekday'].astype(int)
media.head()

#### Running first model (lm1) Weekday & visitors

In [None]:
# Putting feature variable to X
X = media[['Visitors','weekday']]

# Putting response variable to y
y = media['Views_show']

lm_1=data_model(X, y)

print(lm_1.summary())

In [None]:
media=weekend_variable(media)
media.head()

#### Running first model (lm2) visitors & weekend

In [None]:
# Putting feature variable to X
X = media[['Visitors','weekend']]

# Putting response variable to y
y = media['Views_show']

lm_2=data_model(X, y)

print(lm_2.summary())

#### Running third model (lm3) visitors, weekend & Character_A

In [None]:
# Putting feature variable to X
X = media[['Visitors','weekend','Character_A']]

# Putting response variable to y
y = media['Views_show']

lm_3=data_model(X, y)

print(lm_3.summary())

In [None]:
media=lag_variable(media)
media.head()

#### Running fourth model (lm4) visitors, Character_A, Lag_views & weekend

In [None]:
# Putting feature variable to X
X = media[['Visitors','Character_A','Lag_Views','weekend']]

# Putting response variable to y
y = media['Views_show']

lm_4=data_model(X, y)

print(lm_4.summary())

In [None]:
corelation_graph(media)

#### Running fifth model (lm5) Character_A, weekend & Views_platform

In [None]:
# Putting feature variable to X
X = media[['weekend','Character_A','Views_platform']]

# Putting response variable to y
y = media['Views_show']

lm_5=data_model(X, y)

print(lm_5.summary())

#### Running sixth model (lm6) Character_A, weekend & Visitors

In [None]:
# Putting feature variable to X
X = media[['weekend','Character_A','Visitors']]

# Putting response variable to y
y = media['Views_show']

lm_6=data_model(X, y)

print(lm_6.summary())

#### Running seventh model (lm7) Character_A, weekend, Visitors & Ad_impressions

In [None]:
# Putting feature variable to X
X = media[['weekend','Character_A','Visitors','Ad_impression']]

# Putting response variable to y
y = media['Views_show']

lm_7=data_model(X, y)

print(lm_7.summary())

#### Running eight model (lm8) Character_A, weekend & Ad_impressions

In [None]:
# Putting feature variable to X
X = media[['weekend','Character_A','Ad_impression']]

# Putting response variable to y
y = media['Views_show']

lm_8=data_model(X, y)

print(lm_8.summary())

In [None]:
#Ad impression in million
media['ad_impression_million'] = media['Ad_impression']/1000000

#### Running seventh model (lm9) Character_A, weekend, Visitors, ad_impressions_million & Cricket_match_india

In [None]:
X = media[['weekend','Character_A','ad_impression_million','Cricket_match_india']]

# Putting response variable to y
y = media['Views_show']

lm_9=data_model(X, y)

print(lm_9.summary())

#### Running seventh model (lm10) Character_A, weekend & ad_impressions_million

In [None]:
# Putting feature variable to X
X = media[['weekend','Character_A','ad_impression_million']]

# Putting response variable to y
y = media['Views_show']

lm_10=data_model(X, y)

print(lm_10.summary())

#### Serialization and deserialization

In [None]:
serializeModel(media, lm_10, lm_6)

In [None]:
media, lm_10, lm_6 = deserializeModel()

#### Making predictions using lm10

In [None]:
X = media[['weekend','Character_A','ad_impression_million']]
mse, r_squared, Predicted_views = predictions(media, lm_10, X)

In [None]:
print('Mean_Squared_Error :' ,mse)
print('r_square_value :',r_squared)

In [None]:
actual_predictions(media, Predicted_views)

In [None]:
actual_predict_errors(media, Predicted_views)

#### Making predictions using lm6

In [None]:
X = media[['weekend','Character_A','Visitors']]
mse, r_squared, Predicted_views = predictions(media, lm_6, X)

In [None]:
print('Mean_Squared_Error :' ,mse)
print('r_square_value :',r_squared)

In [None]:
actual_predictions(media, Predicted_views)

In [None]:
actual_predict_errors(media, Predicted_views)