# Run Pre Launch Model

In [0]:
sf_creds = 'hbo-max-content-datascience-snowflake-dev'
database = 'max_dev'
input_bucket = "hbo-ingest-datascience-content"
output_bucket = "hbo-outbound-datascience-content-dev"

In [0]:
# Import Packages
import sys, os, re
import io
import pandas as pd
import numpy as np
import itertools as it
import logging
import boto3
import json
from datetime import datetime, timedelta


import lib.util_snowflake as sfk
from snowflake.connector.errors import ProgrammingError
from snowflake.connector.pandas_tools import write_pandas

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
logger = logging.getLogger()
logging.basicConfig(level=logging.INFO)
logger.info(f'Starting Notebook')

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

In [0]:
%load_ext autoreload
%autoreload 2
current_date = (datetime.now() - timedelta(1)).strftime('%Y-%m-%d')
kpi = 'viewing_subs'
geo_value = 'NORTH AMERICA'

schema = 'delphi'

In [0]:
## Run Credentials to connect to Snowflake
logger.info(f'TEST: {sf_creds}')
## Snowflake connection 
conn = sfk.SnowflakeConnector(sfk.SSMPSCredentials(sf_creds))
ctx= conn.connect(database, schema)
cur = ctx.cursor()

## Pull Additional Features

In [0]:
dev_pre_launch_model = 'lib/dev_Pre_Launch_Model.py'
%run $dev_pre_launch_model

In [0]:
pre_launch_model = 'lib/Pre_Launch_Model.py'
%run $pre_launch_model

In [0]:
pre_launch_model = pre_launch_model(kpi, current_date, input_bucket, output_bucket)

In [0]:
prelaunch_trailer_feature, wiki_view_pre_feature = pre_launch_model.feature_engineer()

In [0]:
wiki_view_pre_feature.

In [0]:
# train_test_scope = 'lib/dev_train_test_scope.py'
# %run $train_test_scope

In [0]:
## 2.1 Read Metadata New
query_metadata = sfk.load_query(os.getcwd()+'/queries/query_metadata.sql', geo_value = geo_value)
metadata_feature = sfk.execute_query(query = query_metadata, ctx=ctx)

## 2.2 Read Future Schedule
query_schedule = sfk.load_query(os.getcwd()+'/queries/query_metadata_future.sql', geo_value = geo_value)
df_pred = sfk.execute_query(query = query_schedule, ctx = ctx)
# print('Loading SFK table file {}'.format(file_ref))

----------

In [0]:
from lib.dev_train_test_scope import train_test_scope

current_date = (datetime.now() - timedelta(1)).strftime('%Y-%m-%d')
train_test_data = train_test_scope(current_date, input_bucket, output_bucket, 
                                   database, schema, geo_value, kpi, metadata_feature, df_pred)
train_test_data.run()
train_dataset = train_test_data.train_dataset
score_pgl = train_test_data.score_pgl
score_pre = train_test_data.score_pre
score_post = train_test_data.score_post

## Model

## 1.0 Post-GreenLight Model

In [0]:
from lib.dev_Post_GreenLight_Model import post_greenlight_model

In [0]:
# Post_GreenLight_Model = 'lib/dev_Post_GreenLight_Model.py'
# %run $Post_GreenLight_Model

In [0]:
# Name train and set
train_data_set = train_test_data.train_dataset
test_data_set = train_test_data.score_pgl

# Import Engagement
query_metric = load_query(os.getcwd()+'/queries/query_metric.sql', 
                          geo_value = geo_value, database=database)
df_metric = execute_query(query = query_metric, ctx = ctx)

In [0]:
# Run Model with Cross Validation
pgl_model = post_greenlight_model(train_data_set, test_data_set, kpi, 
                                  input_bucket, output_bucket, geo_value, 
                                  database, schema, df_metric)
pgl_model.feature_engineer()
input_train = pgl_model.train_data
input_test = pgl_model.test_data
pgl_model.cv(NUM_FOLD = 5)

# To review predictions and feature importances in notebook
feature_importances = pgl_model.feature_importances
validation_set = pgl_model.validation_set

In [0]:
pgl_model.scoring()
prediction_set_post_gl = pgl_model.prediction_set

### 1.1 Format PostGLight Predictions and Write them to Delphi

In [0]:
# Add Columns needed for Delphi Prediction Tables

data = {
    'model_name': ['pct_viewing_subs_2.0'],
    'table_name': ['pct_viewing_subs_postgl'],
    'model_version': ['2.0'],
    'sub_type': ['Max Retail+Wholesale'],
    'sub_plan': ['Platform'],
    'unit': ['percent'],
    'region': [geo_value],
    'days_after_premiere': [28],
    'publish_date': [current_date]
}
df_delphi = pd.DataFrame(data=data, columns = data.keys())
df_delphi_postgl = df_delphi.merge(prediction_set_post_gl, how='cross')

# Add imdb_id and ckg_match_id
df_delphi_postgl = df_delphi_postgl.merge(test_data_set[['delphi_id', 'ckg_match_id', 'ckg_series_id', 'imdb_series_id',
                                                         'title_season', 'title_series', 'season_number']], 
                                          on=['title_season', 'title_series', 'season_number'], how='inner')

In [0]:
# Rename Columns to match Delphi
rename_set = {
    'ckg_series_id' : 'title_id',
    'imdb_series_id' : 'imdb_id',
    'title_series' : 'title_name', 
    'derived_genre' :'category',   
    'first_release_date' : 'premiere_date'
}
df_delphi_postgl.rename(columns = rename_set, inplace=True)


In [0]:
# Update dates to datetime object
df_delphi_postgl['premiere_date'] = pd.to_datetime(df_delphi_postgl['premiere_date'])
df_delphi_postgl['publish_date'] = pd.to_datetime(df_delphi_postgl['publish_date'])
df_delphi_postgl['current_days_from_premiere'] = (df_delphi_postgl['publish_date'] - df_delphi_postgl['premiere_date']).dt.days
df_delphi_postgl['target_date'] = df_delphi_postgl['premiere_date'] + df_delphi_postgl['days_after_premiere'].apply(lambda x: pd.DateOffset(days=x))

# Back to string to make compatible with sfk package
df_delphi_postgl['publish_date'] = df_delphi_postgl['publish_date'].apply(lambda x: x.strftime('%Y-%m-%d'))
df_delphi_postgl['target_date'] = df_delphi_postgl['target_date'].apply(lambda x: x.strftime('%Y-%m-%d'))
df_delphi_postgl['premiere_date'] = df_delphi_postgl['premiere_date'].apply(lambda x: x.strftime('%Y-%m-%d'))


In [0]:
# Write to Snowflake
df_delphi_postgl = df_delphi_postgl[df_delphi_postgl['delphi_id'].notnull()].reset_index(drop=True).copy()
sfk.export_dataframe_to_table(database=database, schema='delphi', df=df_delphi_postgl, 
                              table=f'{df_delphi_postgl.loc[0,"table_name"]}_staging', conn=ctx)