# Mindfulness MRT Exploration

Get to know data and see if there are interesting results

## Imports

In [1]:
import pandas as pd
import numpy as np
from statsmodels.discrete.discrete_model import Logit
import statsmodels.api as sm

## Data upload

In [2]:
# App logs
app_logs = pd.read_csv('../mindfulness-data/app-logs-headspace.csv')

# Daily survey
daily_survey = pd.read_csv('../mindfulness-data/daily-survey.csv')

# Mobile survey
mobile_survey = pd.read_csv('../mindfulness-data/mobile-surveys.csv')

# Notification event
notif_event = pd.read_csv('../mindfulness-data/notif-event.csv')

# PAM 
pam = pd.read_csv('../mindfulness-data/pam.csv')

# Participants
participants = pd.read_csv('../mindfulness-data/participants.csv')

## Data cleaning

Get relevant participants

In [3]:
# Get participants ids
participant_emails = participants.email.values

# Filter daily_survey
daily_survey_filt = daily_survey.loc[daily_survey.email.isin(participant_emails), :]

Make dummy vars for method and content type

In [4]:
daily_survey_filt['method_content'] = daily_survey_filt['method'] + '|' + daily_survey_filt['content']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [5]:
# Add on dummy vars
daily_survey_filt = pd.concat([daily_survey_filt, pd.get_dummies(daily_survey_filt['method'])], axis=1)
daily_survey_filt = pd.concat([daily_survey_filt, pd.get_dummies(daily_survey_filt['method_content'])], axis=1)

# Add on timestamp
daily_survey_filt['timestamp'] = pd.to_datetime(daily_survey_filt['alarm_millis'], unit='ms')

Mobile survey - get into a spot whre I could do regression

In [6]:
mobile_survey_filt = mobile_survey.loc[mobile_survey.email.isin(participant_emails), :]

In [278]:
ids = []
email = []
code = []
header = []
timestamp = []
response = []
created_at = []

# Expand the survey responses
for ind in mobile_survey_filt.index:
    response_split = mobile_survey_filt.loc[ind, 'response'].split(';')
    # Go through each response split and log
    for r in response_split:
        temp = r.split('--18000000,')
        if len(temp) > 1:
            # Append
            ids.append(mobile_survey_filt.loc[ind, 'id'])
            email.append(mobile_survey_filt.loc[ind, 'email'])
            code.append(mobile_survey_filt.loc[ind, 'code'])
            header.append(mobile_survey_filt.loc[ind, 'header'])
            timestamp.append(temp[0])
            response.append(temp[1][:-1])
            created_at.append(mobile_survey_filt.loc[ind, 'created_at'])
            
mobile_survey_cleaned = pd.DataFrame({
    'id': ids,
    'email': email,
    'code': code,
    'header': header,
    'timestamp_mindfulness': timestamp,
    'response': response,
    'created_at': created_at
})
mobile_survey_cleaned['timestamp_mindfulness'] = pd.to_datetime(
    mobile_survey_cleaned['timestamp_mindfulness'],
    format='%Y-%m-%dT%H:%M:%S'
)

pam['timestamp_z'] = [i.split('--')[0] for i in pam.timestamp_z.values]
pam['timestamp_z'] = [i.replace('-10800000', '') for i in pam.timestamp_z.values]

pam['timestamp'] = pd.to_datetime(
    pam['timestamp_z'],
    format='%Y-%m-%dT%H:%M:%S'
)

In [279]:
mobile_survey_cleaned['day'] = mobile_survey_cleaned.timestamp_mindfulness.dt.floor('D')
daily_survey_filt['day'] = daily_survey_filt.timestamp.dt.floor('D')
app_logs['created_at'] = pd.to_datetime(app_logs['created_at'])
app_logs['day'] = app_logs['created_at'].dt.floor('d')
pam['timestamp'] = pd.to_datetime(pam['timestamp'])
pam['day'] = pam['timestamp'].dt.floor('d')

Clean PAM

In [280]:
# Get indices
ind_swap = pam.loc[pam.mood.isin([str(i) for i in range(1, 5)]), :].index

# Get values
mood = pam.loc[ind_swap, 'positive_affect'].values
negative_affect = pam.loc[ind_swap, 'affect_valence'].values
positive_affect = pam.loc[ind_swap, 'affect_arousal'].values
affect_arousal = pam.loc[ind_swap, 'mood'].astype(int).values
affect_valence = pam.loc[ind_swap, 'negative_affect']

# Swap
pam.loc[ind_swap, 'mood'] = mood
pam.loc[ind_swap, 'negative_affect'] = negative_affect
pam.loc[ind_swap, 'positive_affect'] = positive_affect
pam.loc[ind_swap, 'affect_arousal'] = affect_arousal
pam.loc[ind_swap, 'affect_valence'] = affect_valence

Get once daily

In [281]:
pam['negative_affect'] = pam.negative_affect.astype(int)
pam['positive_affect'] = pam.positive_affect.astype(int)

Need to group app_logs

In [286]:
app_logs_day = app_logs.groupby(['day', 'worker_id'], as_index=False)['time_seconds'].sum()
app_logs_day.rename(columns={'worker_id': 'email'}, inplace=True)

Join to be able to see whether notification led to app response.

#### Fix timing information

Will push day back by one, if the survey was answered before 5AM

Mobile survey file.

In [283]:
# First get the days where it's early in the morning and shift
mobile_survey_cleaned_shift = mobile_survey_cleaned.copy()
mobile_survey_cleaned_shift = mobile_survey_cleaned_shift.sort_values(
    by=['email', 'timestamp_mindfulness']).reset_index(drop=True)
day_shift_ind = mobile_survey_cleaned_shift.loc[
    mobile_survey_cleaned_shift.timestamp_mindfulness.dt.hour < 5,
    :
].index

mobile_survey_cleaned_shift.loc[day_shift_ind, 'day'] -= pd.to_timedelta(1, unit='d')

# Get remaining names and emails where there are multiple entries
duplicates = mobile_survey_cleaned_shift.groupby(['email', 'day'])['response'].count()
duplicates = duplicates.loc[duplicates > 1, :].reset_index()

for ind in duplicates.index:
    d = duplicates.loc[ind, 'day']
    e = duplicates.loc[ind, 'email']
    prev_day = str((d - pd.to_timedelta(1, unit='d')).floor('d')).split(' ')[0]
    
    if prev_day not in mobile_survey_cleaned_shift.loc[
        mobile_survey_cleaned_shift.email == e, 'day'].dt.floor('d').astype(str).values:
        ind = mobile_survey_cleaned_shift.loc[
            (mobile_survey_cleaned_shift.email == e) & (mobile_survey_cleaned_shift.day == d), :
        ].index[0]
        mobile_survey_cleaned_shift.loc[ind, 'day'] -= pd.to_timedelta(1, unit='d')

mobile_survey_cleaned_shift = mobile_survey_cleaned_shift[
    ['email', 'response', 'day']
].drop_duplicates().reset_index(drop=True)

Push notification survey file

In [284]:
daily_survey_filt_shift = daily_survey_filt.loc[daily_survey_filt[' push_notification'] == 1, :]
daily_survey_filt_shift.rename(columns={' push_notification': 'push_notification'}, inplace=True)

day_shift_ind = daily_survey_filt_shift.loc[
    daily_survey_filt_shift.timestamp.dt.hour < 5,
    :
].index

daily_survey_filt_shift.loc[day_shift_ind, 'day'] -= pd.to_timedelta(1, unit='d')

daily_survey_filt_shift = daily_survey_filt_shift[[
    'email',
    'push_notification',
    'day'
]].drop_duplicates().reset_index(drop=True)

PAM

In [285]:
# Do day shifts
pam_shift = pam.loc[pam.email != 'fnokeke@gmail.com', :].sort_values(by='timestamp').reset_index(drop=True)
day_shift_ind = pam_shift.loc[
    pam_shift.timestamp.dt.hour < 5,
    :
].index

pam_shift.loc[day_shift_ind, 'day'] -= pd.to_timedelta(1, unit='d')

# Get remaining names and emails where there are multiple entries
duplicates = pam_shift.groupby(['email', 'day'])['negative_affect'].count()
duplicates = duplicates.loc[duplicates > 1, :].reset_index()

for ind in duplicates.index:
    d = duplicates.loc[ind, 'day']
    prev_day = str((d - pd.to_timedelta(1, unit='d')).floor('d')).split(' ')[0]
    e = duplicates.loc[ind, 'email']
    if prev_day not in pam_shift.loc[pam_shift.email == e, 'day'].dt.floor('d').astype(str).values:
        ind = pam_shift.loc[
            (pam_shift.email == e) & (pam_shift.day == d), :
        ].index[0]
        pam_shift.loc[ind, 'day'] -= pd.to_timedelta(1, unit='d')

# Group
pam_shift = pam_shift[
    ['email', 'day', 'positive_affect', 'negative_affect']
].drop_duplicates().reset_index(drop=True)
pam_shift = pam_shift.groupby(['email', 'day'], as_index=False)[['positive_affect', 'negative_affect']].mean()

In [211]:
pam.email.unique()

array(['ysmncs@gmail.com', 'samcarlin@gmail.com', 'paulski13@gmail.com',
       'ohioleslie@gmail.com', 'jessica.riviere@gmail.com',
       'jay.springfield@gmail.com', 'hjdeering@gmail.com',
       'grli7272@gmail.com', 'fnokeke@gmail.com', 'fno2@cornell.edu',
       'arhufnagle@gmail.com'], dtype=object)

### Merge data together

In [289]:
notif_w_mindfulness_resp = pd.merge(
    left=mobile_survey_cleaned_shift[['email', 'response', 'day']],
    right=daily_survey_filt_shift[[
        'email',
        'push_notification',
        'day'
    ]],
    on=['day', 'email'],
    how='left'
).drop_duplicates()

notif_w_mindfulness_resp = pd.merge(
    left=notif_w_mindfulness_resp,
    right=app_logs_day[[
        'email',
        'day',
        'time_seconds'
    ]],
    on=['day', 'email'],
    how='left'
).drop_duplicates()

notif_w_mindfulness_resp = pd.merge(
    left=notif_w_mindfulness_resp,
    right=pam_shift[[
        'email',
        'day',
        'positive_affect',
        'negative_affect'
    ]],
    on=['day', 'email'],
    how='left'
).drop_duplicates()

(203, 3)
(203, 7)


In [290]:
pd.isnull(notif_w_mindfulness_resp).sum()

email                  0
response               0
day                    0
push_notification    131
time_seconds         136
positive_affect       10
negative_affect       10
dtype: int64

Fill on days when push notification did not occur. Also fill when there was no time spent in app. Lastly, drop rows without PAM.

In [291]:
notif_w_mindfulness_resp['push_notification'].fillna(0, inplace=True)
notif_w_mindfulness_resp['time_seconds'].fillna(0, inplace=True)

notif_w_mindfulness_resp = notif_w_mindfulness_resp.loc[
    ~pd.isnull(notif_w_mindfulness_resp.negative_affect), :].reset_index(drop=True)

Count, for now, all responses as just yes/no

In [338]:
response_map = {
    'yes-with-app': 1, 
    'no': 0, 
    'yes-on-my-own': 0
}

notif_w_mindfulness_resp['outcome_with_app'] = notif_w_mindfulness_resp.response.map(response_map)

response_map = {
    'yes-with-app': 1, 
    'no': 0, 
    'yes-on-my-own': 1
}

notif_w_mindfulness_resp['outcome_any_mindfulness'] = notif_w_mindfulness_resp.response.map(response_map)


response_map = {
    'yes-with-app': 0, 
    'no': 0, 
    'yes-on-my-own': 1
}

notif_w_mindfulness_resp['outcome_on_my_own'] = notif_w_mindfulness_resp.response.map(response_map)

Now put value to see whether mindfulness was done on previous day

In [100]:
import datetime

In [340]:
notif_w_mindfulness_resp['prev_day_with_app'] = None
notif_w_mindfulness_resp['prev_day_any_mindfulness'] = None
notif_w_mindfulness_resp['prev_day_on_my_own'] = None
notif_w_mindfulness_resp['prev_day_negative_affect'] = None
notif_w_mindfulness_resp['prev_day_negative_affect'] = None
notif_w_mindfulness_resp['prev_day_positive_affect'] = None
notif_w_mindfulness_resp['next_day_time_seconds'] = 0
notif_w_mindfulness_resp = \
    notif_w_mindfulness_resp.sort_values(['email', 'day']).reset_index(drop=True)

for e in notif_w_mindfulness_resp.email.unique():
    # Filter
    ind = notif_w_mindfulness_resp.loc[notif_w_mindfulness_resp.email == e, :].index
    days = notif_w_mindfulness_resp.loc[ind, 'day'].unique()
    # For each day
    for d in days[1:]:
        # Get prev_day
        prev_day = d - (24*3600*int(1e9))
        # Get next day
        next_day = d + (24*3600*int(1e9))
        # Check if day exists in df
        if prev_day in days: 
            # Get mood values
            prev_day_negative = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'negative_affect'
            ].values
            prev_day_positive = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'positive_affect'
            ].values
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'prev_day_negative_affect'
            ] = prev_day_negative
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'prev_day_positive_affect'
            ] = prev_day_positive
            # Check outcome with app 
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'outcome_with_app'
            ].values
            # Check if 1 is an outcome (some mindfulness was done)
            if 1 in outcome:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_with_app'
                ] = 1
            else:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_with_app'
                ] = 0
            # Check outcome with any mindfulness 
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'outcome_any_mindfulness'
            ].values
            # Check if 1 is an outcome (some mindfulness was done)
            if 1 in outcome:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_any_mindfulness'
                ] = 1
            else:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_any_mindfulness'
                ] = 0
            # Check outcome with any mindfulness 
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'outcome_on_my_own'
            ].values
            # Check if 1 is an outcome (some mindfulness was done)
            if 1 in outcome:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_on_my_own'
                ] = 1
            else:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                    (notif_w_mindfulness_resp.day == d) &
                    (notif_w_mindfulness_resp.email == e), 'prev_day_on_my_own'
                ] = 0
        if next_day in days:
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == next_day) &
                (notif_w_mindfulness_resp.email == e), 'time_seconds'
            ].values[0]
            notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'next_day_time_seconds'
            ] = outcome

Intercept

Final cleaning, to make it daily

In [354]:
features = [f for f in notif_w_mindfulness_resp.columns if 'timestamp' not in f]
regression_data = notif_w_mindfulness_resp[features].drop_duplicates()

groups = dict(zip(regression_data.email.unique(), range(len(regression_data.email.unique()))))
regression_data['groups'] = [groups[g] for g in regression_data.email]

### Auxillary functions

In [348]:
from statsmodels.genmod.generalized_estimating_equations import GEE

In [358]:
def run_model(X, input_var, outcome_var, model_type='logit', groups=None):
    X = X[[outcome_var] + [groups] + input_var].dropna().reset_index(drop=True).astype(float)
    if model_type == 'logit':
        model = Logit(X[outcome_var], X[input_var])
        fit_model = model.fit(method='newton')
    if model_type == 'gee':
        model = GEE(X[outcome_var], X[input_var], groups=X[groups])
        fit_model = model.fit()
    elif model_type == 'linear':
        model = sm.OLS(X[outcome_var], X[input_var])
        fit_model = model.fit(method='pinv')
    return fit_model.summary()

## Hypothesis 1

Does receiving a push notification to perform a mindfulness activity affect how likely someone is going to complete a mindfulness activity with the application on the same day they received the notification?

* Dependent var: Self-response stating that a mindfulness activity was completed on the same day with the application
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Whether it was self-reported they used the app on the previous day
* Controlled covariate: Whether it was self-reported they completed any mindfulness activity on the previous day

Going to add response to be binary for each category (yes with app might increase)

In [362]:
regression_data[f_2 + [o]].corr()

Unnamed: 0,push_notification,intercept,outcome_with_app
push_notification,1.0,,0.078098
intercept,,,
outcome_with_app,0.078098,,1.0


In [359]:
f_2 = ['push_notification', 'intercept', 'prev_day_any_mindfulness']
groups = 'groups'

o = 'outcome_with_app'

hypothesis_1_summ_2 = run_model(regression_data, f_2, o, model_type='gee', groups=groups)

print(hypothesis_1_summ_2)

                               GEE Regression Results                              
Dep. Variable:            outcome_with_app   No. Observations:                  141
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                   5
                      Estimating Equations   Max. cluster size:                  25
Family:                           Gaussian   Mean cluster size:                17.6
Dependence structure:         Independence   Num. iterations:                     2
Date:                     Mon, 23 Mar 2020   Scale:                           0.224
Covariance type:                    robust   Time:                         14:29:00
                               coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------
push_notification            0.0353      0.091      0.390 

In [347]:
f_1 = ['push_notification', 'intercept', 'prev_day_with_app', 'prev_day_any_mindfulness', 'prev_day_on_my_own']
f_2 = ['push_notification', 'intercept', 'prev_day_any_mindfulness']
f_3 = ['push_notification', 'intercept', 'prev_day_with_app']
f_4 = ['push_notification', 'intercept', 'prev_day_on_my_own']
f_5 = ['push_notification', 'intercept']

o = 'outcome_with_app'

hypothesis_1_summ_1 = run_model(regression_data, f_1, o)
hypothesis_1_summ_2 = run_model(regression_data, f_2, o)
hypothesis_1_summ_3 = run_model(regression_data, f_3, o)
hypothesis_1_summ_4 = run_model(regression_data, f_4, o)
hypothesis_1_summ_5 = run_model(regression_data, f_5, o)

print(hypothesis_1_summ_1)
print(hypothesis_1_summ_2)
print(hypothesis_1_summ_3)
print(hypothesis_1_summ_4)
print(hypothesis_1_summ_5)

Optimization terminated successfully.
         Current function value: 0.598123
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.630624
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.606768
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.607155
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.635858
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:       outcome_with_app   No. Observations:                  141
Model:                          Logit   Df Residuals:                      137
Method:                           MLE   Df Model:                            3
Date:                Mon, 23 Mar 2020   Pseudo R-squ.:                 0.05283
Time:                        14:23:08   Log-Likelihood:                -84.335
converged:      

## Hypothesis 2

Does receiving a push notification to perform a mindfulness activity affect how likely someone is going to complete any mindfulness activity on the same day they received the notification?

* Dependent var: Self-response stating that any mindfulness activity was completed on the same day 
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Whether it was self-reported they used the app on the previous day
* Controlled covariate: Whether it was self-reported they completed any mindfulness activity on the previous day

In [337]:
f_1 = ['push_notification', 'intercept', 'prev_day_with_app', 'prev_day_any_mindfulness']
f_2 = ['push_notification', 'intercept', 'prev_day_any_mindfulness']
f_3 = ['push_notification', 'intercept', 'prev_day_with_app']
f_4 = ['push_notification', 'intercept']

o = 'outcome_any_mindfulness'

hypothesis_2_summ_1 = run_model(regression_data, f_1, o)
hypothesis_2_summ_2 = run_model(regression_data, f_2, o)
hypothesis_2_summ_3 = run_model(regression_data, f_3, o)
hypothesis_2_summ_4 = run_model(regression_data, f_4, o)

print(hypothesis_2_summ_1)
print(hypothesis_2_summ_2)
print(hypothesis_2_summ_3)
print(hypothesis_2_summ_4)

Optimization terminated successfully.
         Current function value: 0.586088
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.613803
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.647963
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.637432
         Iterations 5
                              Logit Regression Results                             
Dep. Variable:     outcome_any_mindfulness   No. Observations:                  141
Model:                               Logit   Df Residuals:                      137
Method:                                MLE   Df Model:                            3
Date:                     Mon, 23 Mar 2020   Pseudo R-squ.:                  0.1097
Time:                             14:02:29   Log-Likelihood:                -82.638
converged:                            True   LL-Null:                       -92.822
Cova

## Hypothesis 3: 

Does receiving a push notification to perform a mindfulness activity change the time spent on the headspace app within the same day?

* Dependent var: Time spent on headspace app the same day as the notification
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Whether they used headspace the previous day:
* Controlled covariate: Whether they completed any mindfulness on the previous day?

In [303]:
f = ['push_notification', 'intercept'] #, 'prev_day_with_app', 'prev_day_any_mindfulness']
o = 'time_seconds'

hypothesis_3_summ = run_model(regression_data, f, o, model_type='linear')

hypothesis_3_summ

0,1,2,3
Dep. Variable:,time_seconds,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,1.057
Date:,"Mon, 23 Mar 2020",Prob (F-statistic):,0.305
Time:,13:43:17,Log-Likelihood:,-1573.4
No. Observations:,193,AIC:,3151.0
Df Residuals:,191,BIC:,3157.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
push_notification,-129.9768,126.404,-1.028,0.305,-379.304,119.350
intercept,230.6911,76.126,3.030,0.003,80.536,380.846

0,1,2,3
Omnibus:,330.145,Durbin-Watson:,1.357
Prob(Omnibus):,0.0,Jarque-Bera (JB):,58136.254
Skew:,8.498,Prob(JB):,0.0
Kurtosis:,86.31,Cond. No.,2.42


## Hypothesis 4

Does receiving a push notification to perform a mindfulness activity change the time spent on the headspace app within the next day?

* Dependent var: Time spent on headspace app the next day
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Whether they used headspace the previous day:
* Controlled covariate: Whether they completed any mindfulness on the previous day?

In [304]:
f = ['push_notification', 'intercept', 'prev_day_with_app', 'prev_day_any_mindfulness']
o = 'next_day_time_seconds'

hypothesis_4_summ = run_model(regression_data, f, o, model_type='linear')

hypothesis_4_summ

ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).

## Hypothesis 5

Does receiving a push notification to perform a mindfulness activity affect mood on the same day?

* Dependent var: That day’s mood
* Independent var: Push notification to complete a mindfulness activity that occurred prior to the self-reported survey value
* Controlled covariate: Previous day’s mood

In [None]:
f = ['push_notification', 'intercept']
o = 'negative_affect'

hypothesis_4_summ = run_model(regression_data, f, o, model_type='linear')

hypothesis_4_summ

In [None]:
pd.isnull(regression_data).sum()

In [None]:
data_for_model