In [63]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from pandas.io import sql
import snowflake.connector
import keyring
import psycopg2 
import time
from datetime import date, timedelta
from scipy import stats

pd.set_option('display.max_colwidth', 50)
pd.set_option('display.max_columns', 500)

from matplotlib import pyplot as plt
import seaborn as sns
color = sns.color_palette()
%matplotlib inline
sns.set_style("darkgrid")

In [64]:
snowflake_username = 'matthew.bessey@disneystreaming.com'

In [65]:
ctx = snowflake.connector.connect(authenticator='externalbrowser', 
                                  user=snowflake_username, 
                                  account='disneystreaming.us-east-1')

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


In [66]:
# set date parameters for query

subscription_start_date = "'2019-12-08'" # max of subscription start date
engagement_date = "'2019-12-14'" # date for which we want to pull engagement behaviors

In [67]:
query= """
select o.swid
, o.swid_holdout
, a.accountid
, s.subscription_id
, e.TOTAL_STREAMS_L1
, e.total_streams_l7
, e.is_entitled
, e.NUM_STREAMING_DEVICES_L7
from subscription s
join account a on a.accountid = s.account_id
join oneid_combined o on a.swid = o.swid
join "DSS_PROD"."DISNEY_PLUS"."DIM_DISNEY_DAILY_ACCOUNT_ENGAGEMENT" e on a.accountid = e.account_id
where s.partner = 'disney'
and s.CALCULATED_SUBSCRIPTION_START_DTM <= {}
--and s.is_entitled = 1
and e.ds = {}
limit 500000;
""".format(subscription_start_date,engagement_date)

In [68]:
# run the query and write to engagement
engagement = pd.read_sql(query,ctx)

In [71]:
# rename columns and preview data
engagement.columns = engagement.columns.str.lower()
engagement.head()

Unnamed: 0,swid,swid_holdout,accountid,subscription_id,total_streams_l1,total_streams_l7,is_entitled,num_streaming_devices_l7
0,{3B3E5CD1-6641-426E-B845-DF50601CBD99},145,50b2ee63-b105-45e7-a1db-271c4e23e340,IAP:APPLE:120000685339641,0,0,0,0
1,{3BC85F1E-6840-4228-9ACF-DC6BCA368D7B},88,4ce2e8bc-8616-4a60-8546-25cb67fa4d30,D2C:BAMTECH:urn:dss:disney:orders:a5669172-394...,1,3,1,2
2,{3BDA25C5-1859-49B2-8981-0933E34D0563},21,02c33ce0-b5c7-47a9-bfef-44dd310bc63b,EXT:VERIZON:VR-WHMRRNYBNQ8UGYC9,2,8,1,1
3,{3C9ACBB5-5931-49C1-A56F-AD621E0FB97B},126,4983ef9f-2598-4d88-8569-e542872425d0,D2C:BAMTECH:urn:dss:disney:orders:40338d99-10c...,0,0,0,0
4,{3CAA0170-33F5-459E-90F8-2E9164F9BBAA},95,214ea4f8-faea-439f-a6cb-3e478b7428bd,D2C:BAMTECH:urn:dss:disney:orders:68b9ea51-7dd...,6,7,1,1


In [72]:
# create function and apply for mapping of holdout groups on 'swid_holdout'
def holdout_grouping(df):
    if df['swid_holdout'] < 243:
        return "all marketing"
    elif df['swid_holdout'] >= 243 and df['swid_holdout'] < 246:
        return "no onboarding"
    else:
        return "no marketing"
    
engagement['marketing_holdout'] = engagement.apply(holdout_grouping,axis=1)

In [73]:
# preview data for function execution
engagement.head()

Unnamed: 0,swid,swid_holdout,accountid,subscription_id,total_streams_l1,total_streams_l7,is_entitled,num_streaming_devices_l7,marketing_holdout
0,{3B3E5CD1-6641-426E-B845-DF50601CBD99},145,50b2ee63-b105-45e7-a1db-271c4e23e340,IAP:APPLE:120000685339641,0,0,0,0,all marketing
1,{3BC85F1E-6840-4228-9ACF-DC6BCA368D7B},88,4ce2e8bc-8616-4a60-8546-25cb67fa4d30,D2C:BAMTECH:urn:dss:disney:orders:a5669172-394...,1,3,1,2,all marketing
2,{3BDA25C5-1859-49B2-8981-0933E34D0563},21,02c33ce0-b5c7-47a9-bfef-44dd310bc63b,EXT:VERIZON:VR-WHMRRNYBNQ8UGYC9,2,8,1,1,all marketing
3,{3C9ACBB5-5931-49C1-A56F-AD621E0FB97B},126,4983ef9f-2598-4d88-8569-e542872425d0,D2C:BAMTECH:urn:dss:disney:orders:40338d99-10c...,0,0,0,0,all marketing
4,{3CAA0170-33F5-459E-90F8-2E9164F9BBAA},95,214ea4f8-faea-439f-a6cb-3e478b7428bd,D2C:BAMTECH:urn:dss:disney:orders:68b9ea51-7dd...,6,7,1,1,all marketing


In [80]:
#drop rows w/ no entitlement data, rewrite as int
engagement = engagement.drop(engagement[engagement['is_entitled']=='unknown'].index,axis=0)
engagement.is_entitled = engagement.is_entitled.astype(int)

In [75]:
# create active_binary_l1 with 1 = user was active in l1d
engagement['active_binary_l1'] = engagement.apply(lambda df:
                                                 1 if df['total_streams_l1'] > 0
                                                 else 0,
                                                 axis=1)

In [76]:
# create active_binary_l7 with 1 = user was active in l7d
engagement['active_binary_l7'] = engagement.apply(lambda df:
                                                 1 if df['total_streams_l7'] > 0
                                                 else 0,
                                                 axis=1)

In [85]:
engagement.groupby('marketing_holdout').mean()

Unnamed: 0_level_0,swid_holdout,total_streams_l1,total_streams_l7,is_entitled,num_streaming_devices_l7,active_binary_l1,active_binary_l7
marketing_holdout,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
all marketing,120.949058,2.502819,15.566941,0.884101,1.635421,0.456371,0.74275
no marketing,250.512102,2.506861,15.559227,0.886171,1.643815,0.456087,0.746904
no onboarding,244.004446,2.5513,15.916211,0.880643,1.624316,0.466313,0.73803


In [84]:
engagement.groupby('marketing_holdout').median()

Unnamed: 0_level_0,swid_holdout,total_streams_l1,total_streams_l7,is_entitled,num_streaming_devices_l7,active_binary_l1,active_binary_l7
marketing_holdout,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
all marketing,121,0,6,1,1,0,1
no marketing,251,0,6,1,1,0,1
no onboarding,244,0,6,1,1,0,1


In [82]:
t1, p1 = stats.ttest_ind(engagement.loc[engagement['marketing_holdout']=='all marketing','is_entitled'],
                        engagement.loc[engagement.marketing_holdout=='no onboarding','is_entitled'])

In [83]:
print(t1,p1)

0.820941826926816 0.4116798122342361
