# Mindfulness MRT Exploration

Get to know data and see if there are interesting results

## Imports

In [397]:
import pandas as pd
import numpy as np
from statsmodels.discrete.discrete_model import Logit
from statsmodels.genmod.generalized_estimating_equations import GEE
import statsmodels.api as sm

## Data upload

In [2]:
# App logs
app_logs = pd.read_csv('../mindfulness-data/app-logs-headspace.csv')

# Daily survey
daily_survey = pd.read_csv('../mindfulness-data/daily-survey.csv')

# Mobile survey
mobile_survey = pd.read_csv('../mindfulness-data/mobile-surveys.csv')

# Notification event
notif_event = pd.read_csv('../mindfulness-data/notif-event.csv')

# PAM 
pam = pd.read_csv('../mindfulness-data/pam.csv')

# Participants
participants = pd.read_csv('../mindfulness-data/participants.csv')

## Data cleaning

Get relevant participants

In [3]:
# Get participants ids
participant_emails = participants.email.values

# Filter daily_survey
daily_survey_filt = daily_survey.loc[daily_survey.email.isin(participant_emails), :]

Make dummy vars for method and content type

In [4]:
daily_survey_filt['method_content'] = daily_survey_filt['method'] + '|' + daily_survey_filt['content']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [5]:
# Add on dummy vars
daily_survey_filt = pd.concat([daily_survey_filt, pd.get_dummies(daily_survey_filt['method'])], axis=1)
daily_survey_filt = pd.concat([daily_survey_filt, pd.get_dummies(daily_survey_filt['method_content'])], axis=1)

# Add on timestamp
daily_survey_filt['timestamp'] = pd.to_datetime(daily_survey_filt['alarm_millis'], unit='ms')

Mobile survey - get into a spot whre I could do regression

In [6]:
mobile_survey_filt = mobile_survey.loc[mobile_survey.email.isin(participant_emails), :]

In [278]:
ids = []
email = []
code = []
header = []
timestamp = []
response = []
created_at = []

# Expand the survey responses
for ind in mobile_survey_filt.index:
    response_split = mobile_survey_filt.loc[ind, 'response'].split(';')
    # Go through each response split and log
    for r in response_split:
        temp = r.split('--18000000,')
        if len(temp) > 1:
            # Append
            ids.append(mobile_survey_filt.loc[ind, 'id'])
            email.append(mobile_survey_filt.loc[ind, 'email'])
            code.append(mobile_survey_filt.loc[ind, 'code'])
            header.append(mobile_survey_filt.loc[ind, 'header'])
            timestamp.append(temp[0])
            response.append(temp[1][:-1])
            created_at.append(mobile_survey_filt.loc[ind, 'created_at'])
            
mobile_survey_cleaned = pd.DataFrame({
    'id': ids,
    'email': email,
    'code': code,
    'header': header,
    'timestamp_mindfulness': timestamp,
    'response': response,
    'created_at': created_at
})
mobile_survey_cleaned['timestamp_mindfulness'] = pd.to_datetime(
    mobile_survey_cleaned['timestamp_mindfulness'],
    format='%Y-%m-%dT%H:%M:%S'
)

pam['timestamp_z'] = [i.split('--')[0] for i in pam.timestamp_z.values]
pam['timestamp_z'] = [i.replace('-10800000', '') for i in pam.timestamp_z.values]

pam['timestamp'] = pd.to_datetime(
    pam['timestamp_z'],
    format='%Y-%m-%dT%H:%M:%S'
)

In [279]:
mobile_survey_cleaned['day'] = mobile_survey_cleaned.timestamp_mindfulness.dt.floor('D')
daily_survey_filt['day'] = daily_survey_filt.timestamp.dt.floor('D')
app_logs['created_at'] = pd.to_datetime(app_logs['created_at'])
app_logs['day'] = app_logs['created_at'].dt.floor('d')
pam['timestamp'] = pd.to_datetime(pam['timestamp'])
pam['day'] = pam['timestamp'].dt.floor('d')

Clean PAM

In [280]:
# Get indices
ind_swap = pam.loc[pam.mood.isin([str(i) for i in range(1, 5)]), :].index

# Get values
mood = pam.loc[ind_swap, 'positive_affect'].values
negative_affect = pam.loc[ind_swap, 'affect_valence'].values
positive_affect = pam.loc[ind_swap, 'affect_arousal'].values
affect_arousal = pam.loc[ind_swap, 'mood'].astype(int).values
affect_valence = pam.loc[ind_swap, 'negative_affect']

# Swap
pam.loc[ind_swap, 'mood'] = mood
pam.loc[ind_swap, 'negative_affect'] = negative_affect
pam.loc[ind_swap, 'positive_affect'] = positive_affect
pam.loc[ind_swap, 'affect_arousal'] = affect_arousal
pam.loc[ind_swap, 'affect_valence'] = affect_valence

Get once daily

In [281]:
pam['negative_affect'] = pam.negative_affect.astype(int)
pam['positive_affect'] = pam.positive_affect.astype(int)

Need to group app_logs

In [286]:
app_logs_day = app_logs.groupby(['day', 'worker_id'], as_index=False)['time_seconds'].sum()
app_logs_day.rename(columns={'worker_id': 'email'}, inplace=True)

Join to be able to see whether notification led to app response.

#### Fix timing information

Will push day back by one, if the survey was answered before 5AM

Mobile survey file.

In [283]:
# First get the days where it's early in the morning and shift
mobile_survey_cleaned_shift = mobile_survey_cleaned.copy()
mobile_survey_cleaned_shift = mobile_survey_cleaned_shift.sort_values(
    by=['email', 'timestamp_mindfulness']).reset_index(drop=True)
day_shift_ind = mobile_survey_cleaned_shift.loc[
    mobile_survey_cleaned_shift.timestamp_mindfulness.dt.hour < 5,
    :
].index

mobile_survey_cleaned_shift.loc[day_shift_ind, 'day'] -= pd.to_timedelta(1, unit='d')

# Get remaining names and emails where there are multiple entries
duplicates = mobile_survey_cleaned_shift.groupby(['email', 'day'])['response'].count()
duplicates = duplicates.loc[duplicates > 1, :].reset_index()

for ind in duplicates.index:
    d = duplicates.loc[ind, 'day']
    e = duplicates.loc[ind, 'email']
    prev_day = str((d - pd.to_timedelta(1, unit='d')).floor('d')).split(' ')[0]
    
    if prev_day not in mobile_survey_cleaned_shift.loc[
        mobile_survey_cleaned_shift.email == e, 'day'].dt.floor('d').astype(str).values:
        ind = mobile_survey_cleaned_shift.loc[
            (mobile_survey_cleaned_shift.email == e) & (mobile_survey_cleaned_shift.day == d), :
        ].index[0]
        mobile_survey_cleaned_shift.loc[ind, 'day'] -= pd.to_timedelta(1, unit='d')

mobile_survey_cleaned_shift = mobile_survey_cleaned_shift[
    ['email', 'response', 'day']
].drop_duplicates().reset_index(drop=True)

Push notification survey file

In [389]:
daily_survey_filt_shift = daily_survey_filt.loc[daily_survey_filt[' push_notification'] == 1, :]
daily_survey_filt_shift.rename(columns={' push_notification': 'push_notification'}, inplace=True)


daily_survey_filt_shift = daily_survey_filt_shift[[
    'email',
    'push_notification',
    'day'
]].drop_duplicates().reset_index(drop=True)

PAM

In [390]:
# Do day shifts
pam_shift = pam.loc[pam.email != 'fnokeke@gmail.com', :].sort_values(by='timestamp').reset_index(drop=True)
day_shift_ind = pam_shift.loc[
    pam_shift.timestamp.dt.hour < 5,
    :
].index

pam_shift.loc[day_shift_ind, 'day'] -= pd.to_timedelta(1, unit='d')

# Get remaining names and emails where there are multiple entries
duplicates = pam_shift.groupby(['email', 'day'])['negative_affect'].count()
duplicates = duplicates.loc[duplicates > 1, :].reset_index()

for ind in duplicates.index:
    d = duplicates.loc[ind, 'day']
    prev_day = str((d - pd.to_timedelta(1, unit='d')).floor('d')).split(' ')[0]
    e = duplicates.loc[ind, 'email']
    if prev_day not in pam_shift.loc[pam_shift.email == e, 'day'].dt.floor('d').astype(str).values:
        ind = pam_shift.loc[
            (pam_shift.email == e) & (pam_shift.day == d), :
        ].index[0]
        pam_shift.loc[ind, 'day'] -= pd.to_timedelta(1, unit='d')

# Group
pam_shift = pam_shift[
    ['email', 'day', 'positive_affect', 'negative_affect']
].drop_duplicates().reset_index(drop=True)
pam_shift = pam_shift.groupby(['email', 'day'], as_index=False)[['positive_affect', 'negative_affect']].mean()

### Merge data together

In [391]:
notif_w_mindfulness_resp = pd.merge(
    left=mobile_survey_cleaned_shift[['email', 'response', 'day']],
    right=daily_survey_filt_shift[[
        'email',
        'push_notification',
        'day'
    ]],
    on=['day', 'email'],
    how='left'
).drop_duplicates()

notif_w_mindfulness_resp = pd.merge(
    left=notif_w_mindfulness_resp,
    right=app_logs_day[[
        'email',
        'day',
        'time_seconds'
    ]],
    on=['day', 'email'],
    how='left'
).drop_duplicates()

notif_w_mindfulness_resp = pd.merge(
    left=notif_w_mindfulness_resp,
    right=pam_shift[[
        'email',
        'day',
        'positive_affect',
        'negative_affect'
    ]],
    on=['day', 'email'],
    how='left'
).drop_duplicates()

Fill on days when push notification did not occur. Also fill when there was no time spent in app. Lastly, drop rows without PAM.

In [393]:
notif_w_mindfulness_resp['push_notification'].fillna(0, inplace=True)
notif_w_mindfulness_resp['time_seconds'].fillna(0, inplace=True)

notif_w_mindfulness_resp = notif_w_mindfulness_resp.loc[
    ~pd.isnull(notif_w_mindfulness_resp.negative_affect), :].reset_index(drop=True)

Count, for now, all responses as just yes/no

In [394]:
response_map = {
    'yes-with-app': 1, 
    'no': 0, 
    'yes-on-my-own': 0
}

notif_w_mindfulness_resp['outcome_with_app'] = notif_w_mindfulness_resp.response.map(response_map)

response_map = {
    'yes-with-app': 1, 
    'no': 0, 
    'yes-on-my-own': 1
}

notif_w_mindfulness_resp['outcome_any_mindfulness'] = notif_w_mindfulness_resp.response.map(response_map)


response_map = {
    'yes-with-app': 0, 
    'no': 0, 
    'yes-on-my-own': 1
}

notif_w_mindfulness_resp['outcome_on_my_own'] = notif_w_mindfulness_resp.response.map(response_map)

Now put value to see whether mindfulness was done on previous day

In [452]:
notif_w_mindfulness_resp['prev_day_with_app'] = None
notif_w_mindfulness_resp['prev_day_any_mindfulness'] = None
notif_w_mindfulness_resp['prev_day_on_my_own'] = None
notif_w_mindfulness_resp['prev_day_survey_completed'] = 0
notif_w_mindfulness_resp['prev_day_negative_affect'] = None
notif_w_mindfulness_resp['prev_day_positive_affect'] = None
notif_w_mindfulness_resp['next_day_negative_affect'] = None
notif_w_mindfulness_resp['next_day_positive_affect'] = None
notif_w_mindfulness_resp['next_day_time_seconds'] = 0
notif_w_mindfulness_resp = \
    notif_w_mindfulness_resp.sort_values(['email', 'day']).reset_index(drop=True)

for e in notif_w_mindfulness_resp.email.unique():
    # Filter
    ind = notif_w_mindfulness_resp.loc[notif_w_mindfulness_resp.email == e, :].index
    days = notif_w_mindfulness_resp.loc[ind, 'day'].unique()
    # For each day
    for d in days[1:]:
        # Get prev_day
        prev_day = d - (24*3600*int(1e9))
        # Check if day exists in df
        if prev_day in days: 
            # Get mood values
            prev_day_negative = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'negative_affect'
            ].values
            prev_day_positive = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'positive_affect'
            ].values
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'prev_day_negative_affect'
            ] = prev_day_negative
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'prev_day_positive_affect'
            ] = prev_day_positive
            # Check outcome with app 
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'outcome_with_app'
            ].values
            # Check if 1 is an outcome (some mindfulness was done)
            if 1 in outcome:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_with_app'
                ] = 1
            else:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_with_app'
                ] = 0
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e),
                'prev_day_survey_completed'
            ] = 1
            # Check outcome with any mindfulness 
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'outcome_any_mindfulness'
            ].values
            # Check if 1 is an outcome (some mindfulness was done)
            if 1 in outcome:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_any_mindfulness'
                ] = 1
            else:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_any_mindfulness'
                ] = 0
            # Check outcome with any mindfulness 
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'outcome_on_my_own'
            ].values
            # Check if 1 is an outcome (some mindfulness was done)
            if 1 in outcome:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_on_my_own'
                ] = 1
            else:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_on_my_own'
                ] = 0
    # For each day
    for d in days[:-1]:
        # Get next day
        next_day = d + (24*3600*int(1e9))
        if next_day in days:
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == next_day) &
                (notif_w_mindfulness_resp.email == e), 'time_seconds'
            ].values[0]
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'next_day_time_seconds'
            ] = outcome
            
            # Get mood values
            next_day_negative = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == next_day) &
                (notif_w_mindfulness_resp.email == e), 'negative_affect'
            ].values
            next_day_positive = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == next_day) &
                (notif_w_mindfulness_resp.email == e), 'positive_affect'
            ].values
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'next_day_negative_affect'
            ] = next_day_negative
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'next_day_positive_affect'
            ] = next_day_positive

In [453]:
notif_w_mindfulness_resp.head()

Unnamed: 0,email,response,day,push_notification,time_seconds,positive_affect,negative_affect,outcome_with_app,outcome_any_mindfulness,outcome_on_my_own,prev_day_with_app,prev_day_any_mindfulness,prev_day_on_my_own,prev_day_negative_affect,prev_day_positive_affect,next_day_time_seconds,prev_day_survey_completed,next_day_negative_affect,next_day_positive_affect
0,arhufnagle@gmail.com,yes-with-app,2018-12-08,0.0,405.0,5.0,9.0,1,1,0,,,,,,340.0,0,8,12
1,arhufnagle@gmail.com,yes-with-app,2018-12-09,1.0,340.0,12.0,8.0,1,1,0,1.0,1.0,0.0,9.0,5.0,315.0,1,11,7
2,arhufnagle@gmail.com,yes-with-app,2018-12-10,1.0,315.0,7.0,11.0,1,1,0,1.0,1.0,0.0,8.0,12.0,315.0,1,5,9
3,arhufnagle@gmail.com,yes-with-app,2018-12-11,0.0,315.0,9.0,5.0,1,1,0,1.0,1.0,0.0,11.0,7.0,310.0,1,14,2
4,arhufnagle@gmail.com,yes-with-app,2018-12-12,1.0,310.0,2.0,14.0,1,1,0,1.0,1.0,0.0,5.0,9.0,385.0,1,11,7


Final cleaning, to make it daily

In [454]:
features = [f for f in notif_w_mindfulness_resp.columns if 'timestamp' not in f]
regression_data = notif_w_mindfulness_resp[features].drop_duplicates()

groups = dict(zip(regression_data.email.unique(), range(len(regression_data.email.unique()))))
regression_data['groups'] = [groups[g] for g in regression_data.email]

regression_data['intercept'] = 1

### Auxillary functions

In [455]:
def run_model(X, input_var, outcome_var, model_type='logit', groups=[]):
    X = X[[outcome_var] + groups + input_var].dropna().reset_index(drop=True).astype(float)
    if model_type == 'logit':
        model = Logit(X[outcome_var], X[input_var])
        fit_model = model.fit(method='newton')
    if model_type == 'gee':
        model = GEE(X[outcome_var], X[input_var], groups=X[groups])
        fit_model = model.fit()
    elif model_type == 'linear':
        model = sm.OLS(X[outcome_var], X[input_var])
        fit_model = model.fit(method='pinv')
    return fit_model.summary()

## Hypothesis 1

Does receiving a push notification to perform a mindfulness activity affect how likely someone is going to complete a mindfulness activity with the application on the same day they received the notification?

* Dependent var: Self-response stating that a mindfulness activity was completed on the same day with the application
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Whether it was self-reported they used the app on the previous day
* Controlled covariate: Whether it was self-reported they completed any mindfulness activity on the previous day

Going to add response to be binary for each category (yes with app might increase)

In [456]:
f_1 = ['push_notification', 'intercept', 'prev_day_with_app', 'prev_day_any_mindfulness', 'prev_day_on_my_own']
f_2 = ['push_notification', 'intercept', 'prev_day_any_mindfulness']
f_3 = ['push_notification', 'intercept', 'prev_day_with_app']
f_4 = ['push_notification', 'intercept', 'prev_day_on_my_own']
f_5 = ['push_notification', 'intercept', 'prev_day_survey_completed']
f_6 = ['push_notification', 'intercept']

o = 'outcome_with_app'

hypothesis_1_summ_1 = run_model(regression_data, f_1, o)
hypothesis_1_summ_2 = run_model(regression_data, f_2, o)
hypothesis_1_summ_3 = run_model(regression_data, f_3, o)
hypothesis_1_summ_4 = run_model(regression_data, f_4, o)
hypothesis_1_summ_5 = run_model(regression_data, f_5, o)
hypothesis_1_summ_6 = run_model(regression_data, f_6, o)

print(hypothesis_1_summ_1)
print('\n\n')
print(hypothesis_1_summ_2)
print('\n\n')
print(hypothesis_1_summ_3)
print('\n\n')
print(hypothesis_1_summ_4)
print('\n\n')
print(hypothesis_1_summ_5)
print('\n\n')
print(hypothesis_1_summ_6)
print('\n\n')

Optimization terminated successfully.
         Current function value: 0.594183
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.621696
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.601063
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.602914
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.626382
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.627810
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:       outcome_with_app   No. Observations:                  141
Model:                          Logit   Df Residuals:                      137
Method:                           MLE   Df Model:                            3
Date:                Mon, 23 Mar 2020   Pseudo R-squ.:                 0

## Hypothesis 2

Does receiving a push notification to perform a mindfulness activity affect how likely someone is going to complete any mindfulness activity on the same day they received the notification?

* Dependent var: Self-response stating that any mindfulness activity was completed on the same day 
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Whether it was self-reported they used the app on the previous day
* Controlled covariate: Whether it was self-reported they completed any mindfulness activity on the previous day

In [457]:
f_1 = ['push_notification', 'intercept', 'prev_day_with_app', 'prev_day_any_mindfulness', 'prev_day_on_my_own']
f_2 = ['push_notification', 'intercept', 'prev_day_any_mindfulness']
f_3 = ['push_notification', 'intercept', 'prev_day_with_app']
f_4 = ['push_notification', 'intercept', 'prev_day_on_my_own']
f_5 = ['push_notification', 'intercept', 'prev_day_survey_completed']
f_6 = ['push_notification', 'intercept']

o = 'outcome_any_mindfulness'

hypothesis_2_summ_1 = run_model(regression_data, f_1, o)
hypothesis_2_summ_2 = run_model(regression_data, f_2, o)
hypothesis_2_summ_3 = run_model(regression_data, f_3, o)
hypothesis_2_summ_4 = run_model(regression_data, f_4, o)
hypothesis_2_summ_5 = run_model(regression_data, f_5, o)
hypothesis_2_summ_6 = run_model(regression_data, f_6, o)

print(hypothesis_2_summ_1)
print('\n\n')
print(hypothesis_2_summ_2)
print('\n\n')
print(hypothesis_2_summ_3)
print('\n\n')
print(hypothesis_2_summ_4)
print('\n\n')
print(hypothesis_2_summ_5)
print('\n\n')
print(hypothesis_2_summ_6)
print('\n\n')

Optimization terminated successfully.
         Current function value: 0.588840
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.618035
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.655877
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.598118
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.641207
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.643613
         Iterations 4
                              Logit Regression Results                             
Dep. Variable:     outcome_any_mindfulness   No. Observations:                  141
Model:                               Logit   Df Residuals:                      137
Method:                                MLE   Df Model:                            3
Date:                     Mon, 23 Mar 2020   Pseudo 

## Hypothesis 3: 

Does receiving a push notification to perform a mindfulness activity change the time spent on the headspace app within the same day?

* Dependent var: Time spent on headspace app the same day as the notification
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Whether they used headspace the previous day:
* Controlled covariate: Whether they completed any mindfulness on the previous day?

In [458]:
f_1 = ['push_notification', 'intercept', 'prev_day_with_app', 'prev_day_any_mindfulness', 'prev_day_on_my_own']
f_2 = ['push_notification', 'intercept', 'prev_day_any_mindfulness']
f_3 = ['push_notification', 'intercept', 'prev_day_with_app']
f_4 = ['push_notification', 'intercept', 'prev_day_on_my_own']
f_5 = ['push_notification', 'intercept', 'prev_day_survey_completed']
f_6 = ['push_notification', 'intercept']

o = 'time_seconds'

hypothesis_3_summ_1 = run_model(regression_data, f_1, o, model_type='linear')
hypothesis_3_summ_2 = run_model(regression_data, f_2, o, model_type='linear')
hypothesis_3_summ_3 = run_model(regression_data, f_3, o, model_type='linear')
hypothesis_3_summ_4 = run_model(regression_data, f_4, o, model_type='linear')
hypothesis_3_summ_5 = run_model(regression_data, f_5, o, model_type='linear')
hypothesis_3_summ_6 = run_model(regression_data, f_6, o, model_type='linear')

print(hypothesis_3_summ_1)
print('\n\n')
print(hypothesis_3_summ_2)
print('\n\n')
print(hypothesis_3_summ_3)
print('\n\n')
print(hypothesis_3_summ_4)
print('\n\n')
print(hypothesis_3_summ_5)
print('\n\n')
print(hypothesis_3_summ_6)
print('\n\n')

                            OLS Regression Results                            
Dep. Variable:           time_seconds   R-squared:                       0.031
Model:                            OLS   Adj. R-squared:                  0.010
Method:                 Least Squares   F-statistic:                     1.477
Date:                Mon, 23 Mar 2020   Prob (F-statistic):              0.224
Time:                        15:50:10   Log-Likelihood:                -1155.4
No. Observations:                 141   AIC:                             2319.
Df Residuals:                     137   BIC:                             2331.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
push_notification       

## Hypothesis 4

Does receiving a push notification to perform a mindfulness activity change the time spent on the headspace app within the next day?

* Dependent var: Time spent on headspace app the next day
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Whether they used headspace the previous day:
* Controlled covariate: Whether they completed any mindfulness on the previous day?

In [459]:
f_1 = [
    'push_notification', 'intercept', 'outcome_with_app', 
    'outcome_any_mindfulness', 'outcome_on_my_own', 'time_seconds'
]
f_2 = ['push_notification', 'intercept', 'outcome_with_app']
f_3 = ['push_notification', 'intercept', 'outcome_on_my_own']
f_4 = ['push_notification', 'intercept', 'outcome_any_mindfulness']
f_5 = ['push_notification', 'intercept', 'time_seconds']
o = 'next_day_time_seconds'

hypothesis_4_summ_1 = run_model(regression_data, f_1, o, model_type='linear')
hypothesis_4_summ_2 = run_model(regression_data, f_2, o, model_type='linear')
hypothesis_4_summ_3 = run_model(regression_data, f_3, o, model_type='linear')
hypothesis_4_summ_4 = run_model(regression_data, f_4, o, model_type='linear')
hypothesis_4_summ_5 = run_model(regression_data, f_5, o, model_type='linear')

print(hypothesis_4_summ_1)
print('\n\n')
print(hypothesis_4_summ_2)
print('\n\n')
print(hypothesis_4_summ_3)
print('\n\n')
print(hypothesis_4_summ_4)
print('\n\n')
print(hypothesis_4_summ_5)
print('\n\n')

                              OLS Regression Results                             
Dep. Variable:     next_day_time_seconds   R-squared:                       0.072
Model:                               OLS   Adj. R-squared:                  0.052
Method:                    Least Squares   F-statistic:                     3.629
Date:                   Mon, 23 Mar 2020   Prob (F-statistic):            0.00711
Time:                           15:50:26   Log-Likelihood:                -1548.1
No. Observations:                    193   AIC:                             3106.
Df Residuals:                        188   BIC:                             3122.
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------

## Hypothesis 5

Does receiving a push notification to perform a mindfulness activity affect mood on the same day?

* Dependent var: That day’s mood
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Previous day’s mood

In [463]:
f_1 = ['push_notification', 'intercept', 'prev_day_with_app', 'prev_day_any_mindfulness', 'prev_day_on_my_own',
      'prev_day_negative_affect', 'prev_day_positive_affect']
f_2 = ['push_notification', 'intercept', 'prev_day_any_mindfulness',
       'prev_day_negative_affect', 'prev_day_positive_affect'
      ]
f_3 = ['push_notification', 'intercept', 'prev_day_with_app',
      'prev_day_negative_affect', 'prev_day_positive_affect']
f_4 = ['push_notification', 'intercept', 'prev_day_on_my_own',
      'prev_day_negative_affect', 'prev_day_positive_affect']
f_5 = ['push_notification', 'intercept', 'prev_day_survey_completed']
f_6 = ['push_notification', 'intercept', 'prev_day_negative_affect', 'prev_day_positive_affect']
f_7 = ['push_notification', 'intercept']

o = 'negative_affect'

hypothesis_5neg_summ_1 = run_model(regression_data, f_1, o, model_type='linear')
hypothesis_5neg_summ_2 = run_model(regression_data, f_2, o, model_type='linear')
hypothesis_5neg_summ_3 = run_model(regression_data, f_3, o, model_type='linear')
hypothesis_5neg_summ_4 = run_model(regression_data, f_4, o, model_type='linear')
hypothesis_5neg_summ_5 = run_model(regression_data, f_5, o, model_type='linear')
hypothesis_5neg_summ_6 = run_model(regression_data, f_7, o, model_type='linear')

print(hypothesis_5neg_summ_1)
print('\n\n')
print(hypothesis_5neg_summ_2)
print('\n\n')
print(hypothesis_5neg_summ_3)
print('\n\n')
print(hypothesis_5neg_summ_4)
print('\n\n')
print(hypothesis_5neg_summ_5)
print('\n\n')
print(hypothesis_5neg_summ_6)
print('\n\n')

                            OLS Regression Results                            
Dep. Variable:        negative_affect   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                 -0.010
Method:                 Least Squares   F-statistic:                    0.7324
Date:                Mon, 23 Mar 2020   Prob (F-statistic):              0.600
Time:                        15:54:05   Log-Likelihood:                -382.13
No. Observations:                 141   AIC:                             776.3
Df Residuals:                     135   BIC:                             794.0
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
push_notification       

In [462]:
f_1 = ['push_notification', 'intercept', 'prev_day_with_app', 'prev_day_any_mindfulness', 'prev_day_on_my_own',
      'prev_day_negative_affect', 'prev_day_positive_affect']
f_2 = ['push_notification', 'intercept', 'prev_day_any_mindfulness',
       'prev_day_negative_affect', 'prev_day_positive_affect'
      ]
f_3 = ['push_notification', 'intercept', 'prev_day_with_app',
      'prev_day_negative_affect', 'prev_day_positive_affect']
f_4 = ['push_notification', 'intercept', 'prev_day_on_my_own',
      'prev_day_negative_affect', 'prev_day_positive_affect']
f_5 = ['push_notification', 'intercept', 'prev_day_survey_completed']
f_6 = ['push_notification', 'intercept', 'prev_day_negative_affect', 'prev_day_positive_affect']
f_7 = ['push_notification', 'intercept']

o = 'positive_affect'

hypothesis_5pos_summ_1 = run_model(regression_data, f_1, o, model_type='linear')
hypothesis_5pos_summ_2 = run_model(regression_data, f_2, o, model_type='linear')
hypothesis_5pos_summ_3 = run_model(regression_data, f_3, o, model_type='linear')
hypothesis_5pos_summ_4 = run_model(regression_data, f_4, o, model_type='linear')
hypothesis_5pos_summ_5 = run_model(regression_data, f_5, o, model_type='linear')
hypothesis_5pos_summ_6 = run_model(regression_data, f_6, o, model_type='linear')
hypothesis_5pos_summ_7 = run_model(regression_data, f_7, o, model_type='linear')

print(hypothesis_5pos_summ_1)
print('\n\n')
print(hypothesis_5pos_summ_2)
print('\n\n')
print(hypothesis_5pos_summ_3)
print('\n\n')
print(hypothesis_5pos_summ_4)
print('\n\n')
print(hypothesis_5pos_summ_5)
print('\n\n')
print(hypothesis_5pos_summ_6)
print('\n\n')
print(hypothesis_5pos_summ_7)

                            OLS Regression Results                            
Dep. Variable:        positive_affect   R-squared:                       0.031
Model:                            OLS   Adj. R-squared:                 -0.005
Method:                 Least Squares   F-statistic:                    0.8654
Date:                Mon, 23 Mar 2020   Prob (F-statistic):              0.506
Time:                        15:53:56   Log-Likelihood:                -395.25
No. Observations:                 141   AIC:                             802.5
Df Residuals:                     135   BIC:                             820.2
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
push_notification       

## Hypothesis 6

Does receiving a push notification to perform a mindfulness activity affect mood on the next day?

* Dependent var: The next day’s mood
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Today’s mood

In [465]:
f_1 = ['push_notification', 'intercept', 'outcome_with_app', 'outcome_any_mindfulness', 'outcome_on_my_own',
      'negative_affect', 'positive_affect']
f_2 = ['push_notification', 'intercept', 'outcome_any_mindfulness',
       'negative_affect', 'positive_affect'
      ]
f_3 = ['push_notification', 'intercept', 'outcome_with_app',
      'negative_affect', 'positive_affect']
f_4 = ['push_notification', 'intercept', 'outcome_on_my_own',
      'negative_affect', 'positive_affect']
f_6 = ['push_notification', 'intercept', 'negative_affect', 'positive_affect']
f_7 = ['push_notification', 'intercept']

o = 'next_day_negative_affect'

hypothesis_6neg_summ_1 = run_model(regression_data, f_1, o, model_type='linear')
hypothesis_6neg_summ_2 = run_model(regression_data, f_2, o, model_type='linear')
hypothesis_6neg_summ_3 = run_model(regression_data, f_3, o, model_type='linear')
hypothesis_6neg_summ_4 = run_model(regression_data, f_4, o, model_type='linear')
hypothesis_6neg_summ_5 = run_model(regression_data, f_5, o, model_type='linear')
hypothesis_6neg_summ_6 = run_model(regression_data, f_6, o, model_type='linear')
hypothesis_6neg_summ_7 = run_model(regression_data, f_7, o, model_type='linear')

print(hypothesis_6neg_summ_1)
print('\n\n')
print(hypothesis_6neg_summ_2)
print('\n\n')
print(hypothesis_6neg_summ_3)
print('\n\n')
print(hypothesis_6neg_summ_4)
print('\n\n')
print(hypothesis_6neg_summ_5)
print('\n\n')
print(hypothesis_6neg_summ_6)
print('\n\n')
print(hypothesis_6neg_summ_7)

                               OLS Regression Results                               
Dep. Variable:     next_day_negative_affect   R-squared:                       0.026
Model:                                  OLS   Adj. R-squared:                 -0.010
Method:                       Least Squares   F-statistic:                    0.7259
Date:                      Mon, 23 Mar 2020   Prob (F-statistic):              0.605
Time:                              15:55:31   Log-Likelihood:                -382.15
No. Observations:                       141   AIC:                             776.3
Df Residuals:                           135   BIC:                             794.0
Df Model:                                 5                                         
Covariance Type:                  nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------

In [466]:
f_1 = ['push_notification', 'intercept', 'outcome_with_app', 'outcome_any_mindfulness', 'outcome_on_my_own',
      'negative_affect', 'positive_affect']
f_2 = ['push_notification', 'intercept', 'outcome_any_mindfulness',
       'negative_affect', 'positive_affect'
      ]
f_3 = ['push_notification', 'intercept', 'outcome_with_app',
      'negative_affect', 'positive_affect']
f_4 = ['push_notification', 'intercept', 'outcome_on_my_own',
      'negative_affect', 'positive_affect']
f_6 = ['push_notification', 'intercept', 'negative_affect', 'positive_affect']
f_7 = ['push_notification', 'intercept']

o = 'next_day_positive_affect'

hypothesis_6pos_summ_1 = run_model(regression_data, f_1, o, model_type='linear')
hypothesis_6pos_summ_2 = run_model(regression_data, f_2, o, model_type='linear')
hypothesis_6pos_summ_3 = run_model(regression_data, f_3, o, model_type='linear')
hypothesis_6pos_summ_4 = run_model(regression_data, f_4, o, model_type='linear')
hypothesis_6pos_summ_5 = run_model(regression_data, f_5, o, model_type='linear')
hypothesis_6pos_summ_6 = run_model(regression_data, f_6, o, model_type='linear')
hypothesis_6pos_summ_7 = run_model(regression_data, f_7, o, model_type='linear')

print(hypothesis_6pos_summ_1)
print('\n\n')
print(hypothesis_6pos_summ_2)
print('\n\n')
print(hypothesis_6pos_summ_3)
print('\n\n')
print(hypothesis_6pos_summ_4)
print('\n\n')
print(hypothesis_6pos_summ_5)
print('\n\n')
print(hypothesis_6pos_summ_6)
print('\n\n')
print(hypothesis_6pos_summ_7)

                               OLS Regression Results                               
Dep. Variable:     next_day_positive_affect   R-squared:                       0.030
Model:                                  OLS   Adj. R-squared:                 -0.006
Method:                       Least Squares   F-statistic:                    0.8327
Date:                      Mon, 23 Mar 2020   Prob (F-statistic):              0.529
Time:                              15:56:00   Log-Likelihood:                -395.34
No. Observations:                       141   AIC:                             802.7
Df Residuals:                           135   BIC:                             820.4
Df Model:                                 5                                         
Covariance Type:                  nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------