In [17]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from pandas.io import sql
import snowflake.connector
import keyring
import psycopg2 
import time
from datetime import date, timedelta
from scipy import stats

pd.set_option('display.max_colwidth', 50)
pd.set_option('display.max_columns', 500)

from matplotlib import pyplot as plt
import seaborn as sns
color = sns.color_palette()
%matplotlib inline
sns.set_style("darkgrid")

In [18]:
snowflake_username = 'matthew.bessey@disneystreaming.com'

In [19]:
ctx = snowflake.connector.connect(authenticator='externalbrowser', 
                                  user=snowflake_username, 
                                  account='disneystreaming.us-east-1')

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


In [20]:
query = """
select o.swid
, o.swid_holdout
, a.accountid
, s.subscription_id
, e.TOTAL_STREAMS_L1
, e.total_streams_l7
, e.is_entitled
, e.NUM_STREAMING_DEVICES_L7
from subscription s
join account a on a.accountid = s.account_id
join oneid_combined o on a.swid = o.swid
join "DSS_PROD"."DISNEY_PLUS"."DIM_DISNEY_DAILY_ACCOUNT_ENGAGEMENT" e on a.accountid = e.account_id
where s.partner = 'disney'
--and s.is_entitled = 1
and e.ds = '2019-12-12'
limit 1000000;
"""

In [21]:
engagement = pd.read_sql(query,ctx)

In [22]:
engagement.columns = engagement.columns.str.lower()

In [23]:
def holdout_grouping(df):
    if df['swid_holdout'] < 243:
        return "all marketing"
    elif df['swid_holdout'] >= 243 and df['swid_holdout'] < 245:
        return "no onboarding"
    else:
        return "no marketing"

In [24]:
engagement['marketing_holdout'] = engagement.apply(holdout_grouping,axis=1)
#engagement.loc[engagement.swid_holdout < 244,'marketing_holdout'] = 'all marketing'
#engagement.loc[(engagement.swid_holdout >= 244) & 
#               (engagement.swid_holdout < 260),'marketing_holdout'] = 'no onboarding'

In [9]:
engagement.head()

Unnamed: 0,swid,swid_holdout,accountid,subscription_id,total_streams_l1,total_streams_l7,is_entitled,num_streaming_devices_l7,marketing_holdout
0,{00103234-FF10-48D6-BCE2-8DC7BE9B2827},99,c0666394-4674-4dc7-a86a-a4c82b612087,D2C:BAMTECH:urn:dss:disney:orders:7722ef81-693...,0,9,1,2,all marketing
1,{0050C902-AC50-4518-A2CF-DF92F3115C50},207,7333a22d-269c-4678-af15-36e3685d471b,D2C:BAMTECH:urn:dss:disney:orders:218f84e5-ca6...,0,0,0,0,all marketing
2,{00918301-8286-4F9E-9596-659C96984277},247,374a64ab-63e7-4c5c-ac9a-de227f5d08f4,D2C:BAMTECH:urn:dss:disney:orders:0d4eed5a-3e4...,0,3,1,1,no marketing
3,{0142889A-B679-437A-BC94-89804A99CE9F},243,3f2172c8-8d04-46d9-8d92-5aa9d3630a04,D2C:BAMTECH:urn:dss:disney:orders:e216afa4-85a...,0,0,0,0,no onboarding
4,{01519336-83DD-4790-BBD6-AD71DEA54678},143,0ec7511e-06b0-4acd-b02f-b0765bb14e4d,IAP:AMAZON:AuFlMAUfsLDgL_O8jDBpaXBnWasbsCSWJpd...,3,18,1,2,all marketing


In [25]:
engagement['active_binary_l1'] = engagement.apply(lambda df:
                                                 1 if df['total_streams_l1'] > 0
                                                 else 0,
                                                 axis=1)

In [26]:
engagement['active_binary_l7'] = engagement.apply(lambda df:
                                                 1 if df['total_streams_l7'] > 0
                                                 else 0,
                                                 axis=1)

In [27]:
engagement.groupby(['marketing_holdout']).mean()

Unnamed: 0_level_0,swid_holdout,total_streams_l1,total_streams_l7,num_streaming_devices_l7,active_binary_l1,active_binary_l7
marketing_holdout,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
all marketing,120.989641,1.961209,15.839881,1.654915,0.373771,0.758165
no marketing,250.016855,1.94892,15.773456,1.643166,0.374343,0.759426
no onboarding,243.497691,1.977932,15.828586,1.637028,0.379779,0.753272


In [28]:
t1, p1 = stats.ttest_ind(engagement.loc[engagement['marketing_holdout']=='all marketing','total_streams_l1'],
                        engagement.loc[engagement.marketing_holdout=='no onboarding','total_streams_l1'])

In [29]:
print(t1,p1)

-0.3327132507892619 0.7393508190745295
