# Mindfulness MRT Exploration

Get to know data and see if there are interesting results

## Imports

In [1]:
import pandas as pd
import numpy as np

## Data upload

In [2]:
# App logs
app_logs = pd.read_csv('../mindfulness-data/app-logs-headspace.csv')

# Daily survey
daily_survey = pd.read_csv('../mindfulness-data/daily-survey.csv')

# Mobile survey
mobile_survey = pd.read_csv('../mindfulness-data/mobile-surveys.csv')

# Notification event
notif_event = pd.read_csv('../mindfulness-data/notif-event.csv')

# PAM 
pam = pd.read_csv('../mindfulness-data/pam.csv')

# Participants
participants = pd.read_csv('../mindfulness-data/participants.csv')

### Data exploration

In [18]:
app_logs.app_id.unique()

array(['com.getsomeheadspace.android'], dtype=object)

In [17]:
daily_survey.app_id.unique()

array([' com.getsomeheadspace.android', ' pam', ' push_survey'],
      dtype=object)

In [4]:
mobile_survey.head()

Unnamed: 0,id,email,code,header,response,created_at
0,192,fnokeke@gmail.com,461985,"timestamp,q1-mindful-engage","2018-10-25T18:22:27--18000000,yes-on-my-own,;",2018-10-25 22:22:31.761079
1,193,fnokeke@gmail.com,461985,"timestamp,q1-mindful-engage","2018-10-26T23:56:15--18000000,no,;",2018-10-29 18:16:08.605895
2,194,fnokeke@gmail.com,461985,"timestamp,q1-mindful-engage","2018-10-30T0:40:27--18000000,yes-on-my-own,;",2018-10-30 15:25:40.233189
3,195,fnokeke@gmail.com,461985,"timestamp,q1-mindful-engage","2018-10-31T0:0:57--18000000,yes-on-my-own,;",2018-10-31 15:26:14.01151
4,219,fnokeke@gmail.com,461985,"timestamp,q1-mindful-engage","2018-11-12T22:9:32--18000000,no,;2018-11-15T23...",2018-12-10 06:48:30.8586


In [5]:
pam.head()

Unnamed: 0,id,email,code,created_at,affect_arousal,affect_valence,mood,negative_affect,positive_affect,timestamp_z
0,538,ysmncs@gmail.com,461985,2019-03-23 13:59:03.979943,3,1,angry,15,3,2019-3-21T21:33:23--18000000
1,535,ysmncs@gmail.com,461985,2019-03-20 04:56:17.521876,1,1,tired,13,1,2019-3-19T22:42:10--18000000
2,533,ysmncs@gmail.com,461985,2019-03-16 04:57:33.419249,1,1,tired,13,1,2019-3-15T22:15:49--18000000
3,531,ysmncs@gmail.com,461985,2019-03-15 05:07:16.834344,1,1,tired,13,1,2019-3-15T0:29:28--18000000
4,529,ysmncs@gmail.com,461985,2019-03-12 17:00:45.462354,1,1,tired,13,1,2019-3-11T22:22:8--18000000


In [19]:
pam.mood.unique()

array(['angry', 'tired', 'satisfied', 'gloomy', 'sad', 'glad', 'sleepy',
       'calm', 'serene', 'happy', 'frustrated', 'excited', 'afraid',
       'miserable', 'tense', '2', '3', '4', '1', 'delighted'],
      dtype=object)

## Aim 1: Notification affect on doing mindfulness

Daily survey relevent columns:

* Time prompt to complete survey
* Title of notification
* Content of notification

Get relevant participants

In [6]:
# Get participants ids
participant_emails = participants.email.values

# Filter daily_survey
daily_survey_filt = daily_survey.loc[daily_survey.email.isin(participant_emails), :]

In [7]:
participant_emails

array(['samcarlin@gmail.com', 'jessica.riviere@gmail.com',
       'jay.springfield@gmail.com', 'ohioleslie@gmail.com',
       'foreverlaur@gmail.com', 'hjdeering@gmail.com',
       'arhufnagle@gmail.com', 'paulski13@gmail.com',
       'grli7272@gmail.com', 'ysmncs@gmail.com'], dtype=object)

In [8]:
len(np.unique(participant_emails))

10

Make dummy vars for method and content type

In [9]:
daily_survey_filt['method_content'] = daily_survey_filt['method'] + '|' + daily_survey_filt['content']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [10]:
# Add on dummy vars
daily_survey_filt = pd.concat([daily_survey_filt, pd.get_dummies(daily_survey_filt['method'])], axis=1)
daily_survey_filt = pd.concat([daily_survey_filt, pd.get_dummies(daily_survey_filt['method_content'])], axis=1)

# Add on timestamp
daily_survey_filt['timestamp'] = pd.to_datetime(daily_survey_filt['alarm_millis'], unit='ms')

Mobile survey - get into a spot whre I could do regression

In [20]:
nhis_data = pd.read_csv('../../../Spring_2020/DS_in_the_Wild/Hw_1/input_data/nhis_input.csv')

In [22]:
nhis_data.AGE_P.min()

18

In [11]:
mobile_survey_filt = mobile_survey.loc[mobile_survey.email.isin(participant_emails), :]

In [12]:
mobile_survey_filt

Unnamed: 0,id,email,code,header,response,created_at
7,223,arhufnagle@gmail.com,461985,"timestamp,q1-mindful-engage","2018-12-10T10:47:24--18000000,yes-with-app,;",2018-12-10 23:35:01.716901
8,226,jay.springfield@gmail.com,461985,"timestamp,q1-mindful-engage","2018-12-10T22:14:4--18000000,yes-with-app,;",2018-12-11 16:36:07.223174
9,224,ohioleslie@gmail.com,461985,"timestamp,q1-mindful-engage","2018-12-10T22:8:47--18000000,yes-with-app,;201...",2018-12-11 04:49:02.105008
10,225,jessica.riviere@gmail.com,461985,"timestamp,q1-mindful-engage","2018-12-10T9:43:41--18000000,no,;",2018-12-11 12:17:25.033384
11,228,arhufnagle@gmail.com,461985,"timestamp,q1-mindful-engage","2018-12-11T10:29:0--18000000,yes-with-app,;",2018-12-11 23:32:04.633803
...,...,...,...,...,...,...
219,393,grli7272@gmail.com,461985,"timestamp,q1-mindful-engage","2019-3-6T21:0:31--18000000,yes-on-my-own,;",2019-03-07 03:35:36.281551
220,394,grli7272@gmail.com,461985,"timestamp,q1-mindful-engage","2019-3-7T20:56:15--18000000,yes-on-my-own,;",2019-03-08 03:34:36.826764
221,395,grli7272@gmail.com,461985,"timestamp,q1-mindful-engage","2019-3-8T20:51:39--18000000,yes-on-my-own,;",2019-03-09 03:50:32.650308
223,414,grli7272@gmail.com,461985,"timestamp,q1-mindful-engage","2019-4-6T22:31:15--18000000,yes-on-my-own,;",2019-04-07 10:49:06.856001


In [11]:
ids = []
email = []
code = []
header = []
timestamp = []
response = []
created_at = []

# Expand the survey responses
for ind in mobile_survey_filt.index:
    response_split = mobile_survey_filt.loc[ind, 'response'].split(';')
    # Go through each response split and log
    for r in response_split:
        temp = r.split('--18000000,')
        if len(temp) > 1:
            # Append
            ids.append(mobile_survey_filt.loc[ind, 'id'])
            email.append(mobile_survey_filt.loc[ind, 'email'])
            code.append(mobile_survey_filt.loc[ind, 'code'])
            header.append(mobile_survey_filt.loc[ind, 'header'])
            timestamp.append(temp[0])
            response.append(temp[1][:-1])
            created_at.append(mobile_survey_filt.loc[ind, 'created_at'])
            
mobile_survey_cleaned = pd.DataFrame({
    'id': ids,
    'email': email,
    'code': code,
    'header': header,
    'timestamp_mindfulness': timestamp,
    'response': response,
    'created_at': created_at
})
mobile_survey_cleaned['timestamp_mindfulness'] = pd.to_datetime(
    mobile_survey_cleaned['timestamp_mindfulness'],
    format='%Y-%m-%dT%H:%M:%S'
)

Datetime

In [12]:
mobile_survey_cleaned['day'] = mobile_survey_cleaned.timestamp_mindfulness.dt.floor('D')
daily_survey_filt['day'] = daily_survey_filt.timestamp.dt.floor('D')

Join to be able to see whether notification led to app response.

In [13]:
daily_survey_filt.loc[daily_survey.email == 'arhufnagle@gmail.com', 'title'].value_counts()

 You have a new survey.    57
 New PAM survey.           56
                           25
Name: title, dtype: int64

In [130]:
temp = daily_survey_filt.loc[daily_survey_filt[' push_notification'] == 1, :]

notif_w_mindfulness_resp = pd.merge(
    left=mobile_survey_cleaned[['email', 'timestamp_mindfulness', 'response', 'day']],
    right=temp[[
     'email',
     ' push_notification',
     'timestamp',
     'day']
    ],
    on=['day', 'email'],
    how='left'
)

Fill on days when push notification did not occur

In [131]:
notif_w_mindfulness_resp[' push_notification'].fillna(0, inplace=True)

In [132]:
notif_w_mindfulness_resp[' push_notification'].value_counts(normalize=True)

0.0    0.646512
1.0    0.353488
Name:  push_notification, dtype: float64

Count, for now, all responses as just yes/no

In [133]:
response_map = {
    'yes-with-app': 1, 
    'no': 0, 
    'yes-on-my-own': 1
}

notif_w_mindfulness_resp['outcome'] = notif_w_mindfulness_resp.response.map(response_map)

Mark that when a notification occurred, did it occur on the same day

In [134]:
notif_w_mindfulness_resp['before'] = 1
notif_w_mindfulness_resp.loc[
    notif_w_mindfulness_resp.timestamp_mindfulness < notif_w_mindfulness_resp.timestamp, 'before'
] = 0

Now put value to see whether mindfulness was done on previous day

In [135]:
import datetime

In [136]:
notif_w_mindfulness_resp['prev_day_outcome'] = 0
notif_w_mindfulness_resp = \
    notif_w_mindfulness_resp.sort_values(['email', 'timestamp_mindfulness']).reset_index(drop=True)
notif_w_mindfulness_resp['day'] = notif_w_mindfulness_resp['timestamp_mindfulness'].dt.floor('D')

for e in notif_w_mindfulness_resp.email.unique():
    # Filter
    ind = notif_w_mindfulness_resp.loc[notif_w_mindfulness_resp.email == e, :].index
    days = notif_w_mindfulness_resp.loc[ind, 'day'].unique()
    # For each day
    for d in days[1:]:
        # Get prev_day
        prev_day = d - (24*3600*int(1e9))
        # Check if day exists in df
        if prev_day in days:  
            # Find outcomes
            outcome = notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == prev_day) &
                (notif_w_mindfulness_resp.email == e), 'outcome'
            ].values
            # Check if 1 is an outcome (some mindfulness was done)
            if 1 in outcome:
                # Set prev day outcome
                notif_w_mindfulness_resp.loc[
                (notif_w_mindfulness_resp.day == d) &
                (notif_w_mindfulness_resp.email == e), 'prev_day_outcome'
            ] = 1

## Run Logistic regression

In [137]:
from statsmodels.discrete.discrete_model import Logit

In [138]:
notif_w_mindfulness_resp['intercept'] = 1

Going to add response to be binary for each category (yes with app might increase)

In [139]:
notif_w_mindfulness_resp = pd.concat([
    notif_w_mindfulness_resp,
    pd.get_dummies(notif_w_mindfulness_resp['response'])
], axis=1)

In [140]:
notif_w_mindfulness_resp

Unnamed: 0,email,timestamp_mindfulness,response,day,push_notification,timestamp,outcome,before,prev_day_outcome,intercept,no,yes-on-my-own,yes-with-app
0,arhufnagle@gmail.com,2018-12-08 10:05:26,yes-with-app,2018-12-08,0.0,NaT,1,1,0,1,0,0,1
1,arhufnagle@gmail.com,2018-12-09 10:00:45,yes-with-app,2018-12-09,1.0,2018-12-09 00:19:00,1,1,1,1,0,0,1
2,arhufnagle@gmail.com,2018-12-10 10:47:24,yes-with-app,2018-12-10,1.0,2018-12-10 23:30:00,1,0,1,1,0,0,1
3,arhufnagle@gmail.com,2018-12-11 10:29:00,yes-with-app,2018-12-11,0.0,NaT,1,1,1,1,0,0,1
4,arhufnagle@gmail.com,2018-12-12 08:46:21,yes-with-app,2018-12-12,1.0,2018-12-12 23:30:00,1,0,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,ysmncs@gmail.com,2019-03-11 22:21:40,yes-with-app,2019-03-11,0.0,NaT,1,1,1,1,0,0,1
211,ysmncs@gmail.com,2019-03-15 00:29:37,no,2019-03-15,0.0,NaT,0,1,0,1,1,0,0
212,ysmncs@gmail.com,2019-03-15 22:15:15,yes-with-app,2019-03-15,0.0,NaT,1,1,0,1,0,0,1
213,ysmncs@gmail.com,2019-03-19 22:42:19,yes-on-my-own,2019-03-19,0.0,NaT,1,1,0,1,0,1,0


In [235]:
def run_model(X, features, outcome_var):
    model = Logit(X[outcome_var], X[features])
    fit_model = model.fit(method='newton')
    return fit_model.summary()

In [246]:
f = [' push_notification', 'intercept']# 'prev_day_outcome', 'intercept']
o = 'yes-with-app'

yes_w_app_summ = run_model(data_for_model, f, o)

yes_w_app_summ

Optimization terminated successfully.
         Current function value: 120.133093
         Iterations 5


  return 1 - self.llf/self.llnull


0,1,2,3
Dep. Variable:,yes-with-app,No. Observations:,191.0
Model:,Logit,Df Residuals:,189.0
Method:,MLE,Df Model:,1.0
Date:,"Thu, 23 Jan 2020",Pseudo R-squ.:,inf
Time:,22:30:16,Log-Likelihood:,-22945.0
converged:,True,LL-Null:,0.0
Covariance Type:,nonrobust,LLR p-value:,1.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
push_notification,0.7172,0.335,2.143,0.032,0.061,1.373
intercept,-0.8714,0.186,-4.685,0.000,-1.236,-0.507


In [247]:
pd.read_html(yes_w_app_summ.tables[1].as_html(), header=0, index_col=0)[0]

Unnamed: 0,coef,std err,z,P>|z|,[0.025,0.975]
push_notification,0.7172,0.335,2.143,0.032,0.061,1.373
intercept,-0.8714,0.186,-4.685,0.0,-1.236,-0.507


In [238]:
f = [' push_notification', 'prev_day_outcome', 'intercept']
o = 'yes-on-my-own'

yes_on_my_own_summ = run_model(data_for_model, f, o)

yes_on_my_own_summ

Optimization terminated successfully.
         Current function value: 158.670758
         Iterations 6


  return 1 - self.llf/self.llnull


0,1,2,3
Dep. Variable:,yes-on-my-own,No. Observations:,191.0
Model:,Logit,Df Residuals:,188.0
Method:,MLE,Df Model:,2.0
Date:,"Thu, 23 Jan 2020",Pseudo R-squ.:,inf
Time:,17:07:45,Log-Likelihood:,-30306.0
converged:,True,LL-Null:,0.0
Covariance Type:,nonrobust,LLR p-value:,1.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
push_notification,-1.0537,0.411,-2.562,0.010,-1.860,-0.248
prev_day_outcome,0.7974,0.322,2.478,0.013,0.167,1.428
intercept,-0.8339,0.231,-3.607,0.000,-1.287,-0.381


In [240]:
pd.read_html(yes_on_my_own_summ.tables[1].as_html(), header=0, index_col=0)[0]

Unnamed: 0,coef,std err,z,P>|z|,[0.025,0.975]
push_notification,-1.0537,0.411,-2.562,0.01,-1.86,-0.248
prev_day_outcome,0.7974,0.322,2.478,0.013,0.167,1.428
intercept,-0.8339,0.231,-3.607,0.0,-1.287,-0.381


In [241]:
data_for_model.shape

(191, 13)

In [245]:
data_for_model

Unnamed: 0,email,timestamp_mindfulness,response,day,push_notification,timestamp,outcome,before,prev_day_outcome,intercept,no,yes-on-my-own,yes-with-app
0,arhufnagle@gmail.com,2018-12-08 10:05:26,yes-with-app,2018-12-08,0.0,NaT,1,1,0,1,0,0,1
1,arhufnagle@gmail.com,2018-12-09 10:00:45,yes-with-app,2018-12-09,1.0,2018-12-09 00:19:00,1,1,1,1,0,0,1
3,arhufnagle@gmail.com,2018-12-11 10:29:00,yes-with-app,2018-12-11,0.0,NaT,1,1,1,1,0,0,1
6,arhufnagle@gmail.com,2018-12-14 09:59:25,yes-with-app,2018-12-14,0.0,NaT,1,1,1,1,0,0,1
7,arhufnagle@gmail.com,2018-12-15 21:05:33,no,2018-12-15,0.0,NaT,0,1,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,ysmncs@gmail.com,2019-03-11 22:21:40,yes-with-app,2019-03-11,0.0,NaT,1,1,1,1,0,0,1
211,ysmncs@gmail.com,2019-03-15 00:29:37,no,2019-03-15,0.0,NaT,0,1,0,1,1,0,0
212,ysmncs@gmail.com,2019-03-15 22:15:15,yes-with-app,2019-03-15,0.0,NaT,1,1,0,1,0,0,1
213,ysmncs@gmail.com,2019-03-19 22:42:19,yes-on-my-own,2019-03-19,0.0,NaT,1,1,0,1,0,1,0
