In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pytz
from scipy.stats import linregress

pd.set_option('display.max_rows', 1000)
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [None]:
report_types = pd.api.types.CategoricalDtype(['mood', 'sleep', 'anticipatoryStress', 'ruminationStress'])
subj_measures = pd.read_csv('subj_measures.csv', dtype={'types': report_types}, parse_dates=['date'])
subj_measures = subj_measures.sort_values('date').reset_index().drop('index', axis='columns')

eastern = pytz.timezone('US/Eastern')
subj_measures['date_eastern'] = subj_measures['date'].dt.tz_convert(eastern)
subj_measures['time_of_day'] = subj_measures['date_eastern'].dt.hour + \
                               subj_measures['date_eastern'].dt.minute/60
subj_measures['hour'] = subj_measures['date_eastern'].dt.hour
subj_measures['month'] = subj_measures['date_eastern'].dt.to_period(freq='M')

In [None]:
unique_users = subj_measures.user_id.unique()
print(unique_users)

In [None]:
user_df = pd.DataFrame({'user_id': unique_users}).set_index('user_id')
user_df['first_date'] = subj_measures.groupby('user_id').first().date_eastern
user_df['last_date'] = subj_measures.groupby('user_id').last().date_eastern
user_df['num_days'] = (user_df['last_date'] - user_df['first_date']).dt.components.days
user_df['num_reports'] = subj_measures.groupby('user_id').count().value
for metric in report_types.categories.values:
    user_df['avg_' + metric] = subj_measures[subj_measures['type']==metric].groupby('user_id').mean().value

user_df['morning_reports_sleep'] = subj_measures[(subj_measures['type']=='sleep') & (subj_measures['hour'].isin([8,9,10,11,12]))].groupby('user_id').count().value
user_df

In [None]:
def single_user_by_month(user_id, subj_measures=subj_measures, metrics=report_types.categories.values):
    user_select = subj_measures['user_id'] == user_id
    
    metric_select = {m: subj_measures['type'] == m for m in metrics}
    
    unique_months = subj_measures[user_select].month.unique()
    single_user_df = pd.DataFrame({'month': unique_months}).set_index('month')
    
    for metric, m_select in metric_select.items():
        single_user_df[metric] = subj_measures[user_select & m_select].groupby(
            subj_measures[user_select].month).mean().value
        single_user_df['num_' + metric] = subj_measures[user_select & m_select].groupby(
            subj_measures[user_select].month).count().value
        single_user_df['num_' + metric] = single_user_df['num_' + metric].fillna(0.0)
    
    return single_user_df.reset_index()


def progress_plot(user_id, metrics=['mood', 'sleep'], subj_measures=subj_measures):
    by_month = single_user_by_month(user_id)
    f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10, 6), gridspec_kw={'height_ratios': [3, 1]})

    for metric in metrics:
        ax1.plot_date(by_month.month.dt.to_timestamp(), by_month[metric], '-o', label=metric)
    
    ax1.set_yticks(np.arange(0, 4.5, 0.5))
    ax1.set_ylabel("Score")
    ax1.legend()
    
    cumsum = np.zeros_like(by_month['num_'+metric])

    for metric in metrics:
        ax2.bar(by_month.month.dt.to_timestamp(), by_month['num_'+metric],
                width=7.0, bottom = cumsum)
        cumsum += by_month['num_'+metric]
    ax2.tick_params(labelrotation=25)
    ax2.set_ylabel("Total Reports")
    
    ax1.set_title("Monthly Trend".format(metric))
    
    
    
progress_plot(2012, metrics=['mood', 'sleep'])

In [None]:
def monthly_trend(user_id, metric='mood', by_month=None):
    if by_month is None:
        by_month=single_user_by_month(2025)
        
    goodvals = ~by_month[metric].isna()
    result = linregress(by_month.index[goodvals], by_month[metric][goodvals])
    
    return result.slope

for metric in report_types.categories.values:
    user_df['trend_' + metric] = 0.0

for user_id in unique_users:
    by_month = single_user_by_month(user_id)
    for metric in report_types.categories.values:
        try:
            user_df.loc[user_id, 'trend_' + metric] = monthly_trend(user_id, metric=metric, by_month=by_month)
        except:
            user_df.loc[user_id, 'trend_' + metric] = np.nan
    
user_df

In [None]:
plt.hist(user_df.morning_reports_sleep/user_df.num_days, bins=np.arange(0,0.4,0.005));
#plt.hist(user_df.trend_mood, bins=np.arange(0,4.0,0.05));
#plt.hist(user_df.trend_anticipatoryStress, bins=np.arange(0,4.0,0.05));
#plt.hist(user_df.trend_ruminationStress, bins=np.arange(0,4.0,0.05));

In [None]:
plt.figure(figsize=(16,12))
plt.hist((subj_measures['date_eastern'].dt.hour + subj_measures['date_eastern'].dt.minute/60), bins=np.arange(0, 24.25, 0.25));
plt.xticks(np.arange(0, 24, 4))

In [None]:
by_hour_mood = subj_measures[subj_measures['type'] == 'mood'].groupby('hour').mean()
by_hour_sleep = subj_measures[subj_measures['type'] == 'sleep'].groupby('hour').mean()
by_hour_anticip = subj_measures[subj_measures['type'] == 'anticipatoryStress'].groupby('hour').mean()
plt.plot(by_hour_mood.index, by_hour_mood.value)
plt.plot(by_hour_sleep.index, by_hour_sleep.value)
plt.plot(by_hour_anticip.index, by_hour_anticip.value)

In [None]:
report_types.categories.values