In [None]:
import pandas as pd
import sqlalchemy
from matplotlib import pyplot as plt
import datetime
import copy

In [None]:
db = sqlalchemy.engine.url.URL(drivername='mysql',
                            host='127.0.0.1',
                            database='sherman_1',
                            query={'read_default_file': '~/.my.cnf', 'charset':'utf8'})
engine = sqlalchemy.create_engine(db)

In [None]:
df = pd.read_sql('''
    SELECT * FROM sherman_1.plugin_ios_activity_recognition;
''' ,engine)

In [None]:
# merge sensor info for the same participant
df_id = pd.read_csv('/home/memari/jhu_centering/tables/id.csv')
# second device_id
l = [17,18,61,20,22,37]
l2 = set()
for index in l:
    l2.add(df_id.loc[index,'device_id2'])
# create a dict of 'device_id2':'device_id1'
my_dict = {}
for index, row in df_id.iterrows():
    if row['device_id2'] in l2:
        my_dict[row['device_id2']] = row['device_id1']
# replace
for index, row in df.iterrows():
    if row['device_id'] in my_dict:
        df.loc[index,'device_id'] = my_dict[row['device_id']]

In [None]:
df['date'] = pd.to_datetime(df['timestamp'],unit='ms').dt.date

In [None]:
df_less = df[df['label'] != 'supplement']
df_less = df[['device_id','date','stationary','walking','running','automotive','cycling','unknown']]

In [None]:
df_output = df_less.groupby(['device_id','date']).sum()
df_output = df_output.reset_index()
df_output

In [None]:
df_output.to_csv("ios_activity_cleaned.csv", index = False)

# filter 1-28

In [None]:
def find_first_day(device_id, df_cleaned):
    df_target = df_cleaned[df_cleaned['device_id']==device_id]
    return min(df_target['date'])

In [None]:
def get_1_to_28(df_cleaned):
    first_day = {device_id:find_first_day(device_id, df_cleaned) 
                 for device_id in set(df_cleaned['device_id'].tolist())
                if find_first_day(device_id, df_cleaned)>=datetime.date(year=2022,month=11,day=11)}
    df_cleaned = df_cleaned[df_cleaned['device_id'].isin(first_day)]
    df_cleaned['day'] = df_cleaned.apply(lambda x: 1+(x['date']-first_day[x['device_id']]).days, axis = 1)
    df_cleaned = df_cleaned[df_cleaned['day']<=28]
    return df_cleaned

In [None]:
df_cleaned_1_28 = get_1_to_28(df_output)
df_cleaned_1_28

# lineplot

In [None]:
df_plot = copy.deepcopy(df_cleaned_1_28)
df_plot = df_plot[['stationary','walking','running','automotive','cycling','unknown','day']]
df_plot['total'] = df_plot.apply(lambda x: x['stationary']+x['walking']+x['running']+x['automotive']+x['cycling']+x['unknown'],
                                axis = 1)
for feature in ['stationary','walking','running','automotive','cycling','unknown']:
    df_plot[feature] = df_plot.apply(lambda x: x[feature]/x['total'], axis = 1)

df_plot = df_plot.drop('total', axis = 1)
df_plot = df_plot.groupby('day').mean()
df_plot = df_plot.reset_index()
df_plot


In [None]:
plt.figure(figsize=(8, 6), dpi=300)
for feature in ['stationary','walking','running','automotive','cycling','unknown']:
    x = [i+1 for i in range(28)]
    y = df_plot[feature].tolist()
    plt.plot(x,y,label = feature)
plt.xlim(1,28)
plt.xticks([i+1 for i in range(28)],[i+1 for i in range(28)])
plt.title("Ratio of Daily Physical Activities")
plt.xlabel("Days After Join")
plt.ylabel('Ratio')
plt.legend()
plt.show()

# group plot

In [None]:
df_id = pd.read_csv("/data/meditation/majid/tables/id.csv")
df_id = df_id.drop_duplicates('study_id')
df_id = df_id.drop('id', axis = 1)
df_id.columns = ['device_id','Study ID']

df_group = pd.read_csv('/data/meditation/majid/surveys/0.csv', skiprows=[1,2])[['Study ID','Group']]
df_group = df_id.merge(df_group, on = 'Study ID', how = 'left')
df_group

In [None]:
df_plot = copy.deepcopy(df_cleaned_1_28)
df_plot = df_plot.merge(df_group[['device_id','Group']], on = 'device_id', how = 'left')
df_plot = df_plot[['stationary','walking','running','automotive','cycling','unknown','day','Group']]
df_plot['total'] = df_plot.apply(lambda x: x['stationary']+x['walking']+x['running']+x['automotive']+x['cycling']+x['unknown'],
                                axis = 1)
for feature in ['stationary','walking','running','automotive','cycling','unknown']:
    df_plot[feature] = df_plot.apply(lambda x: x[feature]/x['total'] if x['total']>0 else 0, axis = 1)

df_plot = df_plot.drop('total', axis = 1)
df_plot = df_plot.groupby(['Group','day']).mean()
df_plot = df_plot.reset_index()
df_plot


In [None]:
for feature in ['stationary','walking','automotive']:
    plt.figure(figsize=(8, 6), dpi=300)
    for group in ['Active Control', 'Experimental Condition', 'Passive Control']:
        df_target = df_plot[df_plot['Group']==group]
        x = [i+1 for i in range(28)]
        y = df_target[feature].tolist()
        plt.plot(x,y,label = group)
    plt.xlim(1,28)
    plt.xticks([i+1 for i in range(28)],[i+1 for i in range(28)])
    plt.title("Ratio of Daily Physical Activities - "+feature)
    plt.xlabel("Days After Join")
    plt.ylabel('Ratio')
    plt.legend()
    plt.show()