In [1]:
import pandas as pd
import numpy as np
import itertools as it
import os
import io
import logging

import boto3
import sys
import json

from lib.model import ModelMain

# configs
from lib.config import percent_data_process_info
from lib.config import prelaunch_process_info
from lib.config import metadata_process_info
from lib.config import default_params_dict as params_dict
from lib.config import model_name_list
from lib.config import params_tuning_dict


# Reading Data 

Reading data from multiple sources, with the corresponding query

### Step 1 of the Prediction Process: Getting Data
Step 1.1: update the funnel metrics by Sagemaker ipynb file 'query_pipeline' under the '/query' folder

Step 1.2: run each of the query in the '/day28_prediction/query/' to extract each input csv below

In [2]:
input_bucket = 'hbo-ingest-datascience-content-dev'

In [3]:
logger = logging.getLogger()
logger.info(f'Loading inputs')
data_list =[]

s3 = boto3.resource('s3')
bucket = s3.Bucket(input_bucket)
# Iterates through all the objects, doing the pagination for you. Each obj
# is an ObjectSummary, so it doesn't contain the body. You'll need to call
# get to get the whole body.
for obj in bucket.objects.filter(Prefix='input_percent_view'):
    key = obj.key
    logger.info('Loading csv file {}'.format(key))
    body = obj.get()['Body']
    var_name = key.split('.')[0].split('/')[1]
    print('Reading {0} features'.format(var_name))
    exec("{0}=pd.read_csv(body, na_values = [r'\\\\N'])".format(var_name))
    exec("{0}.columns = {0}.columns.str.lower()".format(var_name))
    
    # exclude the full null columns
    exec("{0} = {0}.loc[:,{0}.isnull().sum()!={0}.shape[0]]".format(var_name))

    # exclude the old Mortal Kombat movie because the trailer percent view 
    # matching matches the trailer of the new movie to the old movie
    # exclude Tom & Jerry due to unresolvable data issue
    exec("{0} = {0}.loc[{0}['match_id_platform'].\
        isin(['1-GYGQBcwsaCIW2XgEAAAAL', '0-GYGQBcwsaCIW2XgEAAAAL', '1-GYEb9QwLgFF9_ZwEAAAA7', '0-GYEb9QwLgFF9_ZwEAAAA7'])==False,:]\
        .reset_index(drop = True)".format(var_name))
    
    # append the feature df
    exec("data_list.append({0})".format(var_name))

Reading funnel_metric_feature features
Reading media_cost_postlaunch_feature features
Reading media_cost_prelaunch_feature features
Reading metadata_feature features
Reading prelaunch_trailer_feature features
Reading prelaunch_trailer_feature_before28 features
Reading sub_total_feature features
Reading trailer_feature features
Reading vtp_feature features
Reading wiki_view_feature features
Reading wiki_view_feature_before28 features


In [4]:
bucket = s3.Bucket(input_bucket)
for obj in bucket.objects.filter(Prefix='pct_actives_prediction/pct_actives'):
    key = obj.key
    logger.info('Loading csv file {}'.format(key))
    body = obj.get()['Body']
    var_name = key.split('.')[0].split('/')[1]
    print('Reading {0} features'.format(var_name))
    exec("{0}=pd.read_csv(body, na_values = [r'\\\\N'])".format(var_name))
    exec("{0}.columns = {0}.columns.str.lower()".format(var_name))
    
    # exclude the full null columns
    exec("{0} = {0}.loc[:,{0}.isnull().sum()!={0}.shape[0]]".format(var_name))

    # exclude the old Mortal Kombat movie because the trailer percent view 
    # matching matches the trailer of the new movie to the old movie
    # exclude Tom & Jerry due to unresolvable data issue
    exec("{0} = {0}.loc[{0}['match_id'].\
        isin(['1-GYGQBcwsaCIW2XgEAAAAL', '0-GYGQBcwsaCIW2XgEAAAAL', '1-GYEb9QwLgFF9_ZwEAAAA7', '0-GYEb9QwLgFF9_ZwEAAAA7'])==False,:]\
        .reset_index(drop = True)".format(var_name))
    
    # append the feature df
    exec("data_list.append({0})".format(var_name))

Reading pct_actives features


In [68]:
active_data = data_list[-1][['match_id_platform', 'days_after_launch', 'pct_actives']]
active_data['pct_actives_values'] = active_data.groupby(['match_id_platform', 'days_after_launch'])['pct_actives'].transform('mean')
active_data = active_data[['match_id_platform', 'days_after_launch', 'pct_actives_values']]
active_data = active_data[(active_data['match_id_platform'].notnull())
                         &(active_data['days_after_launch'].notnull())]
active_data.drop_duplicates(inplace = True)

In [69]:
active_data = active_data.pivot(index='match_id_platform', columns='days_after_launch', values=['pct_actives_values']).reset_index()
active_data.columns = ['match_id_platform'] + ['pct_actives_00' + str(i) for i in range(1, 29)]
data_list.pop(-1)
data_list.append(active_data)

In [70]:
active_data.head()

Unnamed: 0,match_id_platform,pct_actives_001,pct_actives_002,pct_actives_003,pct_actives_004,pct_actives_005,pct_actives_006,pct_actives_007,pct_actives_008,pct_actives_009,...,pct_actives_0019,pct_actives_0020,pct_actives_0021,pct_actives_0022,pct_actives_0023,pct_actives_0024,pct_actives_0025,pct_actives_0026,pct_actives_0027,pct_actives_0028
0,0-GV-BKigrJWcJMwwEAAABi,0.245508,0.33937,0.408132,0.450645,0.518481,0.584256,0.627948,0.670518,0.71542,...,1.086071,1.122282,1.150591,1.177301,1.202733,1.227059,1.251608,1.282392,1.316002,1.337381
1,0-GV-BKvAt0FsJMwwEAAABv,0.002158,0.002899,0.003816,0.004408,0.004867,0.005296,0.005914,0.006487,0.007081,...,0.013187,0.013553,0.013999,0.014456,0.01491,0.015489,0.015946,0.01632,0.016758,0.017292
2,0-GV-BPaQSKT8JMwwEAAACP,0.128517,0.19525,0.252511,0.289867,0.341674,0.40149,0.452797,0.50507,0.530118,...,0.726141,0.741945,0.757079,0.776649,0.813322,0.841869,0.873462,0.908156,0.94324,0.97615
3,0-GV-P5WwNChivDZAEAAAAn,0.002867,0.005021,0.005831,0.006572,0.006972,0.007594,0.008171,0.008572,0.009225,...,0.015282,0.015688,0.016086,0.016635,0.01678,0.017249,0.017417,0.017631,0.017869,0.018349
4,0-GV1XW7AqGccPDwwEAAAAQ,0.00349,0.004691,0.006162,0.007429,0.008459,0.009493,0.010564,0.011949,0.013023,...,0.02548,0.026921,0.028188,0.029471,0.030587,0.031314,0.033043,0.03462,0.035875,0.037023


In [71]:
# start a object
logger.info('Setting up the prediction model')
percentile_used = 0.8
back_consideration_date = 180
nfold = np.floor(back_consideration_date/30)
cv_func = ModelMain(data_list, metadata_process_info['label_columns'], metadata_process_info['num_columns'])

Final title size: 6084, All title size: 6084


# New Title Prediction, Post Launch 

### Step 2: Make Prediction
Note: Because it is a post launch prediction, only the titles with partial percent view and view through portion data will be predicted

In [None]:
'''
Get the prediction tarjectory over length of data
'''

percent_data_process_info['exact_X_pred'] = False
output_flag = False
new_title_output = pd.DataFrame()
existing_title_output = pd.DataFrame()
back_consideration_date = 180

for day in range(-27,27):
    # renew the percent_data_process_info data very time
    from lib.config import percent_data_process_info
    from lib.config import prelaunch_process_info
    from lib.config import metadata_process_info

    # determine prelaunch or postlaunch
    if day < 1:
        input_process_info = dict(prelaunch_process_info)
        percent_data_process_info['target_log_transformation'] = False
        percent_data_process_info['log_ratio_transformation'] = False
        input_percentile_used = percentile_used
        model_name = 'lr'
        model_name_list = [model_name]
    elif day<14:
        input_process_info = dict(metadata_process_info)
        percent_data_process_info['target_log_transformation'] = True
        percent_data_process_info['log_ratio_transformation'] = True
        input_percentile_used = percentile_used
        model_name = 'lgb'
        model_name_list = [model_name]
    else:
        input_process_info = dict(metadata_process_info)
        percent_data_process_info['target_log_transformation'] = False
        percent_data_process_info['log_ratio_transformation'] = False
        input_percentile_used = percentile_used
        model_name = 'lr'
        model_name_list = [model_name]

    # just to make the values in the dict back to the initial values
    percent_data_process_info = dict(percent_data_process_info)
    percent_data_process_info['max_num_day'] = day
    
    # get x and y
    logger.info('Get X and y for day {}'.format(day))
    cv_func.get_X_y(percent_data_process_info, 
                     input_process_info, 
                     day001_popularity_threshold = input_percentile_used)
                     
    if cv_func.pred_empty_flag == True:
        print('no title needs to be predicted at day {}'.format(day))
        continue

    # tune parameter
    if model_name not in  ['lr', 'enet']:
        logger.info('Tune parameter for day {}'.format(day))
        print('Tune parameter for day {}'.format(day))
        cv_func.parameter_tuning(model_name, 
                            params_tuning_dict, 
                            percent_data_process_info,
                            nfold = nfold,
                            back_consideration_date = back_consideration_date)
        
        params_dict = cv_func.min_smape_param['min_smape_original']
        param_stats = cv_func.parameter_tuning_stats
        logger.info('SMAPE for all titles {}'.format(param_stats['min_smape_all']))
        logger.info('SMAPE for the originals {}'.format(param_stats['min_smape_original']))
        print('SMAPE for all titles {}'.format(param_stats['min_smape_all']))
        print('SMAPE for the originals {}'.format(param_stats['min_smape_original']))
    
    else:
        logger.info('Do cross prediction for day {}'.format(day))
        print('Do cross prediction for day {}'.format(day))
        cv_func.cross_prediction(
                         model_name_list, 
                         params_dict, 
                         percent_data_process_info, 
                         nfold = nfold, 
                         back_consideration_date = back_consideration_date)
        
        logger.info('SMAPE for all titles {}'.format(cv_func.output['smape_' + model_name].mean()))
        logger.info('SMAPE for the originals {}'.format(cv_func.output.loc[cv_func.output['program_type']==1,'smape_' + model_name].mean()))
        print('SMAPE for all titles {}'.format(cv_func.output['smape_' + model_name].mean()))
        print('SMAPE for the originals {}'.format(cv_func.output.loc[cv_func.output['program_type']==1,'smape_' + model_name].mean()))
    
    # make prediction
    logger.info('Making prediction for day {}'.format(day))
    print('Making prediction for day {}'.format(day))
    cv_func.predict_new_titles(model_name_list, 
                               params_dict, 
                               percent_data_process_info)
    
    # process the output
    cur_new_title_output = cv_func.new_title_output
    pred_column = cur_new_title_output.columns[cur_new_title_output.columns.str.contains(model_name)][0]
    cur_new_title_output['pred_day'] = day
    cur_new_title_output = cur_new_title_output.rename(columns = {pred_column:'prediction'})
    
    # process the existing titles
    cur_existing_title_output = cv_func.output
    pred_column = cur_existing_title_output.columns[cur_existing_title_output.columns.str.contains(model_name)][0]
    cur_existing_title_output['pred_day'] = day
    cur_existing_title_output = cur_existing_title_output.rename(columns = {pred_column:'prediction'})
    cur_existing_title_output = cur_existing_title_output.rename(columns = {'smape_lgb':'smape'
                                                                    ,'smape_lr':'smape'
                                                                    ,'smape_enet':'smape'
                                                                    ,'mae_lgb':'mae'
                                                                    ,'mae_lr':'mae'
                                                                    ,'mae_enet':'mae'
                                                                    })
    
    if output_flag:
        new_title_output = pd.concat([new_title_output,cur_new_title_output], axis = 0)
        existing_title_output = pd.concat([existing_title_output, cur_existing_title_output], axis = 0)
    else:
        new_title_output = cur_new_title_output
        existing_title_output = cur_existing_title_output
        output_flag = True
          
# final formatting

if new_title_output.shape[0]>0:    
    new_title_output = new_title_output.drop(columns = ['target']).sort_values(['title_name','pred_day'])
    new_title_output = new_title_output[['title_name'
                                        ,'match_id'
                                        ,'match_id_platform'
                                        ,'platform_name'
                                        ,'program_type'
                                        ,'pred_day'
                                        ,'prediction']]

if existing_title_output.shape[0]>0:           
    existing_title_output = existing_title_output.sort_values(['match_id_platform','pred_day'])
    existing_title_output['platform_name'] = existing_title_output['match_id_platform'].apply(lambda x: x[0])
    existing_title_output = existing_title_output[['title_name'
                                                ,'match_id'
                                                ,'match_id_platform'
                                                ,'platform_name'
                                                ,'program_type'
                                                ,'target'
                                                ,'pred_day'
                                                ,'prediction'
                                                ,'smape'
                                                ,'mae'
                                                ,'fold']]

only 16 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -27
only 17 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -26
only 18 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -25


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 19 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -24
only 20 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -23
only 21 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -22


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 22 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -21
only 23 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -20
only 24 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -19


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 24 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -18
only 25 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -17
only 25 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -16


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 26 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -15
only 27 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -14
only 29 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -13


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 29 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -12
only 29 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -11
only 29 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -10


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 29 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -9
only 30 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -8
only 31 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -7


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 32 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -6
only 32 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -5
only 32 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -4


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 33 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -3
only 33 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -2
only 33 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day -1


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]
  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


only 33 titles considered after prelaunch filter
the number of days is not large enough to use log ratio transformation
X and y are ready based on the input params
no title needs to be predicted at day 0
keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 1
parameter combination 1


  self.X_base = self.X[self.y!=-100]
  self.X_pred = self.X[((self.y==-100) & (self.X['platform_name']==1))]


parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




parameter combination 7




parameter combination 8




SMAPE for all titles 0.2854812624477232
SMAPE for the originals 0.2606525008105219
Making prediction for day 1




keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 2
parameter combination 1




parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




parameter combination 7




parameter combination 8




SMAPE for all titles 0.3052652985176481
SMAPE for the originals 0.2600346932625543
Making prediction for day 2




keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 3
parameter combination 1




parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




parameter combination 7




parameter combination 8




SMAPE for all titles 0.2813786766704221
SMAPE for the originals 0.22375721447627384
Making prediction for day 3




keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 4
parameter combination 1




parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




parameter combination 7




parameter combination 8




SMAPE for all titles 0.2675171724238055
SMAPE for the originals 0.25003584413465796
Making prediction for day 4




keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 5
parameter combination 1




parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




parameter combination 7




parameter combination 8




SMAPE for all titles 0.2451519536724177
SMAPE for the originals 0.24698105332101813
Making prediction for day 5




keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 6
parameter combination 1




parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




parameter combination 7




parameter combination 8




SMAPE for all titles 0.20822649798829498
SMAPE for the originals 0.210287361817556
Making prediction for day 6




keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 7
parameter combination 1




parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




parameter combination 7




parameter combination 8




SMAPE for all titles 0.22357672843825815
SMAPE for the originals 0.23924033442472475
Making prediction for day 7




keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 8
parameter combination 1




parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




parameter combination 7




parameter combination 8




SMAPE for all titles 0.22277130542391918
SMAPE for the originals 0.22342822899527104
Making prediction for day 8




keeps the titles above 80.0 percentile day1 viewed over all titles only
only 1186 titles considered
X and y are ready based on the input params
Tune parameter for day 9
parameter combination 1




parameter combination 2




parameter combination 3




parameter combination 4




parameter combination 5




parameter combination 6




# Write csvs to S3

### Step 3: Write the prediction result to S3

In [None]:
def to_s3(filename, output_bucket, content):
    client = boto3.client('s3')
    client.put_object(Bucket=output_bucket, Key=filename, Body=content)

In [None]:
logger.info('Writing new title predictions over time to S3 as an csv file')
print('Writing new title predictions over time to S3 as an csv file')
csv_buffer = io.StringIO()
new_title_output.to_csv(csv_buffer, index = False)
content = csv_buffer.getvalue()

filename = 'pct_actives_prediction/new_title_prediction.csv'

to_s3(filename, output_bucket, content)

In [None]:
logger.info('Writing existing title predictions over time to S3 as an csv file')
print('Writing existing title predictions over time to S3 as an csv file')
csv_buffer = io.StringIO()
existing_title_output.to_csv(csv_buffer, index = False)
content = csv_buffer.getvalue()

filename = 'pct_actives_prediction/existing_title_prediction.csv'

to_s3(filename, output_bucket, content)

# Results

In [None]:
existing_title_output.to_csv('existing_title_output.csv')

In [None]:
existing_title_output[existing_title_output['title_name'] == 'Titans S3']

In [12]:
new_title_output[new_title_output['title_name'] == 'Cry Macho'].pred_day.unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13])