In [2]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from pandas.io import sql
import snowflake.connector
import keyring
import psycopg2 
import time
from datetime import date, timedelta
from scipy import stats

pd.set_option('display.max_colwidth', 50)
pd.set_option('display.max_columns', 500)

from matplotlib import pyplot as plt
import seaborn as sns
color = sns.color_palette()
%matplotlib inline
sns.set_style("darkgrid")

In [3]:
snowflake_username = 'matthew.bessey@disneystreaming.com'

In [4]:
ctx = snowflake.connector.connect(authenticator='externalbrowser', 
                                  user=snowflake_username, 
                                  account='disneystreaming.us-east-1')

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


In [5]:
# set date parameters for query

subscription_start_date = "'2019-12-08'" # max of subscription start date
engagement_date = "'2019-12-14'" # date for which we want to pull engagement behaviors

In [6]:
query= """
select o.swid
, o.swid_holdout
, a.accountid
, s.subscription_id
, e.TOTAL_STREAMS_L1
, e.total_streams_l7
, e.is_entitled
, e.num_streaming_devices_l1
, e.NUM_STREAMING_DEVICES_L7
, e.num_streaming_profiles_l1
, e.num_streaming_profiles_l7
from subscription s
join account a on a.accountid = s.account_id
join oneid_combined o on a.swid = o.swid
join "DSS_PROD"."DISNEY_PLUS"."DIM_DISNEY_DAILY_ACCOUNT_ENGAGEMENT" e on a.accountid = e.account_id
where s.partner = 'disney'
and s.CALCULATED_SUBSCRIPTION_START_DTM <= {}
--and s.is_entitled = 1
and e.ds = {}
limit 500000;
""".format(subscription_start_date,engagement_date)

In [7]:
# run the query and write to engagement
engagement = pd.read_sql(query,ctx)

In [9]:
# rename columns and preview data
engagement.columns = engagement.columns.str.lower()
engagement.head()

Unnamed: 0,swid,swid_holdout,accountid,subscription_id,total_streams_l1,total_streams_l7,is_entitled,num_streaming_devices_l1,num_streaming_devices_l7,num_streaming_profiles_l1,num_streaming_profiles_l7
0,{A6646E50-0775-43A2-8803-6F20B1313F45},237,900de9e6-ffea-49b8-af34-88d1d5b527aa,D2C:BAMTECH:urn:dss:disney:orders:00f41ade-89c...,1,3,1,1,1,1,1
1,{A6EAAF79-AABA-4DEB-898D-D3A0DB5284CA},93,5365970c-721f-4271-903f-fe323eb001c7,D2C:BAMTECH:urn:dss:disney:orders:5d6fecfc-b2f...,0,0,0,0,0,0,0
2,{A7148562-BE97-4797-84A6-B802E17276D7},80,63a6b087-c08f-4de7-b126-ad245f02eef5,D2C:BAMTECH:urn:dss:disney:orders:60117879-e99...,8,49,1,2,4,3,5
3,{A72F45E3-AB28-4E15-92B7-573B84D1FD7C},69,282ea41a-de72-4dfc-9661-5c6adeb932f8,IAP:GOOGLE:GPA.3338-8359-5027-20595,12,26,1,1,2,1,1
4,{A790A98E-CFC7-493E-9DD1-92673D2A47F3},200,806514d6-d527-4e24-afd5-cf369763ff97,D2C:BAMTECH:urn:dss:disney:orders:5f021c4b-da8...,0,0,1,0,0,0,0


In [10]:
# create function and apply for mapping of holdout groups on 'swid_holdout'
def holdout_grouping(df):
    if df['swid_holdout'] < 243:
        return "all marketing"
    elif df['swid_holdout'] >= 243 and df['swid_holdout'] < 246:
        return "no onboarding"
    else:
        return "no marketing"
    
engagement['marketing_holdout'] = engagement.apply(holdout_grouping,axis=1)

In [11]:
# preview data for function execution
engagement.head()

Unnamed: 0,swid,swid_holdout,accountid,subscription_id,total_streams_l1,total_streams_l7,is_entitled,num_streaming_devices_l1,num_streaming_devices_l7,num_streaming_profiles_l1,num_streaming_profiles_l7,marketing_holdout
0,{A6646E50-0775-43A2-8803-6F20B1313F45},237,900de9e6-ffea-49b8-af34-88d1d5b527aa,D2C:BAMTECH:urn:dss:disney:orders:00f41ade-89c...,1,3,1,1,1,1,1,all marketing
1,{A6EAAF79-AABA-4DEB-898D-D3A0DB5284CA},93,5365970c-721f-4271-903f-fe323eb001c7,D2C:BAMTECH:urn:dss:disney:orders:5d6fecfc-b2f...,0,0,0,0,0,0,0,all marketing
2,{A7148562-BE97-4797-84A6-B802E17276D7},80,63a6b087-c08f-4de7-b126-ad245f02eef5,D2C:BAMTECH:urn:dss:disney:orders:60117879-e99...,8,49,1,2,4,3,5,all marketing
3,{A72F45E3-AB28-4E15-92B7-573B84D1FD7C},69,282ea41a-de72-4dfc-9661-5c6adeb932f8,IAP:GOOGLE:GPA.3338-8359-5027-20595,12,26,1,1,2,1,1,all marketing
4,{A790A98E-CFC7-493E-9DD1-92673D2A47F3},200,806514d6-d527-4e24-afd5-cf369763ff97,D2C:BAMTECH:urn:dss:disney:orders:5f021c4b-da8...,0,0,1,0,0,0,0,all marketing


In [12]:
#drop rows w/ no entitlement data, rewrite as int
engagement = engagement.drop(engagement[engagement['is_entitled']=='unknown'].index,axis=0)
engagement.is_entitled = engagement.is_entitled.astype(int)

In [13]:
# create active_binary_l1 with 1 = user was active in l1d
engagement['active_binary_l1'] = engagement.apply(lambda df:
                                                 1 if df['total_streams_l1'] > 0
                                                 else 0,
                                                 axis=1)

In [14]:
# create active_binary_l7 with 1 = user was active in l7d
engagement['active_binary_l7'] = engagement.apply(lambda df:
                                                 1 if df['total_streams_l7'] > 0
                                                 else 0,
                                                 axis=1)

In [55]:
(engagement.groupby('marketing_holdout').mean().iloc[1,1:]/engagement.groupby('marketing_holdout').mean().iloc[2,1:]-1)*100

total_streams_l1            -4.281891
total_streams_l7            -5.776804
is_entitled                 -0.220654
num_streaming_devices_l1    -2.318981
num_streaming_devices_l7    -1.757573
num_streaming_profiles_l1   -2.863786
num_streaming_profiles_l7   -1.850551
active_binary_l1            -2.565453
active_binary_l7            -0.780762
dtype: float64

In [28]:
engagement.groupby('marketing_holdout').median()

Unnamed: 0_level_0,swid_holdout,total_streams_l1,total_streams_l7,is_entitled,num_streaming_devices_l1,num_streaming_devices_l7,num_streaming_profiles_l1,num_streaming_profiles_l7,active_binary_l1,active_binary_l7
marketing_holdout,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
all marketing,121,0,6,1,0,1,0,1,0,1
no marketing,250,0,5,1,0,1,0,1,0,1
no onboarding,244,0,6,1,0,1,0,1,0,1


In [29]:
t1, p1 = stats.ttest_ind(engagement.loc[engagement['marketing_holdout']=='all marketing','is_entitled'],
                        engagement.loc[engagement.marketing_holdout=='no onboarding','is_entitled'])

In [30]:
print(t1,p1)

0.20854168863716346 0.8348061236709071
