In [5]:
import pandas as pd
from pymongo import MongoClient
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import json
from bson import ObjectId
from pprint import pprint
import seaborn as sns
import pickle
import ast

In [6]:
with open('credentials.json') as f:
    data = json.load(f)
    username = data['username']
    password = data['password']

In [7]:
client = MongoClient('mongodb://%s:%s@127.0.0.1' % (username, password))
db = client.rais

In [8]:
# read sema collection
collection = db.sema
df = pd.DataFrame(list(collection.find()))

In [9]:
# Deleting the no-responses and nones
df=pd.DataFrame(list(collection.find( { '$and': [ {"data.MOOD": { "$ne": "<no-response>" }},{"data.MOOD": { "$ne": None }}, {"data.PLACE": { "$ne": "<no-response>" }},{"data.PLACE": { "$ne": None }} ] } )))

In [10]:
df=df[['user_id']].join(pd.DataFrame(df['data'].tolist(), index=df.index).add_prefix('data.'))

In [11]:
df['date'] = pd.to_datetime(df['data.CREATED_TS']).dt.date
df['Time'] = pd.to_datetime(df['data.CREATED_TS']).dt.time
users=df['user_id']
days=df['date']
mood=df['data.MOOD']
df = pd.concat([users, days, mood],axis=1)
df = df.sort_values(by='date', ascending=True)
df['date'] = pd.to_datetime(df['date'].astype("str"), format='%Y-%m-%d')
df

Unnamed: 0,user_id,date,data.MOOD
2897,621e362467b776a2404ad513,2021-04-22,JOY
4075,621e362467b776a2404ad513,2021-04-23,NEUTRAL
3186,621e362467b776a2404ad513,2021-04-23,NEUTRAL
3669,621e362467b776a2404ad513,2021-04-23,SURPRISE
3295,621e362467b776a2404ad513,2021-04-24,ANGER
...,...,...,...
138,621e2eaf67b776a2406b14ac,2022-01-16,SAD
649,621e346f67b776a24081744f,2022-01-17,TENSE/ANXIOUS
1113,621e30f467b776a240f22944,2022-01-17,NEUTRAL
4946,621e2f9167b776a240011ccb,2022-01-17,TENSE/ANXIOUS


In [12]:
# Selecting experiment dates
df1 = df[(df['date'] >= "2021-05-24") & (df['date'] <= "2021-07-26")] #Round1
df2 = df[(df['date'] >= "2021-11-15") & (df['date'] <= "2022-01-17")] #Round2
df=pd.concat([df1, df2], ignore_index=True)
df

Unnamed: 0,user_id,date,data.MOOD
0,621e362467b776a2404ad513,2021-05-24,HAPPY
1,621e34ca67b776a240be3b69,2021-05-24,TIRED
2,621e32d967b776a240627414,2021-05-24,RESTED/RELAXED
3,621e30e467b776a240e817c7,2021-05-24,TENSE/ANXIOUS
4,621e36f967b776a240e5e7c9,2021-05-24,TIRED
...,...,...,...
4942,621e2eaf67b776a2406b14ac,2022-01-16,SAD
4943,621e346f67b776a24081744f,2022-01-17,TENSE/ANXIOUS
4944,621e30f467b776a240f22944,2022-01-17,NEUTRAL
4945,621e2f9167b776a240011ccb,2022-01-17,TENSE/ANXIOUS


In [13]:
sema = pd.get_dummies(df['data.MOOD'])
sema

Unnamed: 0,ALERT,HAPPY,NEUTRAL,RESTED/RELAXED,SAD,TENSE/ANXIOUS,TIRED
0,0,1,0,0,0,0,0
1,0,0,0,0,0,0,1
2,0,0,0,1,0,0,0
3,0,0,0,0,0,1,0
4,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...
4942,0,0,0,0,1,0,0
4943,0,0,0,0,0,1,0
4944,0,0,1,0,0,0,0
4945,0,0,0,0,0,1,0


In [14]:
# combine one-hot encoding with actual df
sema_stress = pd.concat([df, sema], axis=1)
sema_stress.drop(['data.MOOD'], axis=1, inplace=True)

In [15]:
sema_stress

Unnamed: 0,user_id,date,ALERT,HAPPY,NEUTRAL,RESTED/RELAXED,SAD,TENSE/ANXIOUS,TIRED
0,621e362467b776a2404ad513,2021-05-24,0,1,0,0,0,0,0
1,621e34ca67b776a240be3b69,2021-05-24,0,0,0,0,0,0,1
2,621e32d967b776a240627414,2021-05-24,0,0,0,1,0,0,0
3,621e30e467b776a240e817c7,2021-05-24,0,0,0,0,0,1,0
4,621e36f967b776a240e5e7c9,2021-05-24,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...
4942,621e2eaf67b776a2406b14ac,2022-01-16,0,0,0,0,1,0,0
4943,621e346f67b776a24081744f,2022-01-17,0,0,0,0,0,1,0
4944,621e30f467b776a240f22944,2022-01-17,0,0,1,0,0,0,0
4945,621e2f9167b776a240011ccb,2022-01-17,0,0,0,0,0,1,0


In [16]:
sema_stress.to_pickle('data/sema_stress_read_from_the_base_experiment_dates.pkl')