### Data preparation

In [72]:
import pandas as pd
import os

mood_log = "mood_log.csv"
sleep_df = pd.read_csv("../Generated_Sleep_Log.csv")
exercise_df = pd.read_csv("../Generated_Exercise_Log.csv")
schedule_df = pd.read_csv("../Generated_Schedule_Log.csv")
mood_df = pd.read_csv("../Generated_Mood_Log.csv")
sleep_df.head()

Unnamed: 0,Date,Sleep Start,Wake-Up Time,Duration (Hours)
0,2025-01-01,03:00:00,06:00:00,3.0
1,2025-01-02,02:00:00,09:00:00,7.0
2,2025-01-03,23:00:00,09:00:00,10.0
3,2025-01-04,23:00:00,07:00:00,8.0
4,2025-01-05,03:00:00,09:00:00,6.0


In [73]:
sleep_df["Date"] = pd.to_datetime(sleep_df["Date"])
exercise_df["Date"] = pd.to_datetime(exercise_df["Date"])
schedule_df["Date"] = pd.to_datetime(schedule_df["Date"])
mood_df["Date"] = pd.to_datetime(mood_df["Date"])

In [74]:
# Exercise can be more than just 1, in case of merging this data into the final merged_df, I will have redundant data, such as multiple same dates and times sleeped.
exercise_daily = exercise_df.groupby("Date", as_index=False)["Calories Burned"].sum()
exercise_daily.head

# approach for daily tasks (schedule) problem with multiple entries in one day.
schedule_stress_daily = schedule_df.groupby("Date", as_index=False)["Expected Stress Level"].sum()

schedule_stress_daily["Adjusted Stress Level"] = schedule_stress_daily["Expected Stress Level"].apply(lambda x: min(x, 10))

cleaned_schedule_df = schedule_stress_daily[["Date", "Adjusted Stress Level"]]
cleaned_schedule_df.head()




Unnamed: 0,Date,Adjusted Stress Level
0,2025-01-01,2
1,2025-01-02,2
2,2025-01-03,1
3,2025-01-04,8
4,2025-01-05,4


In [75]:
merged_df = sleep_df.merge(exercise_daily, on="Date", how="left")
merged_df = merged_df.merge(cleaned_schedule_df, on="Date", how="left")
merged_df = merged_df.merge(mood_df, on="Date", how="left")

merged_df.head()

Unnamed: 0,Date,Sleep Start,Wake-Up Time,Duration (Hours),Calories Burned,Adjusted Stress Level,Mood Score
0,2025-01-01,03:00:00,06:00:00,3.0,41.4,2,6
1,2025-01-02,02:00:00,09:00:00,7.0,64.0,2,4
2,2025-01-03,23:00:00,09:00:00,10.0,216.0,1,3
3,2025-01-04,23:00:00,07:00:00,8.0,68.0,8,6
4,2025-01-05,03:00:00,09:00:00,6.0,85.5,4,5


In [76]:
merged_df.fillna(0, inplace=True)
merged_df.head()

Unnamed: 0,Date,Sleep Start,Wake-Up Time,Duration (Hours),Calories Burned,Adjusted Stress Level,Mood Score
0,2025-01-01,03:00:00,06:00:00,3.0,41.4,2,6
1,2025-01-02,02:00:00,09:00:00,7.0,64.0,2,4
2,2025-01-03,23:00:00,09:00:00,10.0,216.0,1,3
3,2025-01-04,23:00:00,07:00:00,8.0,68.0,8,6
4,2025-01-05,03:00:00,09:00:00,6.0,85.5,4,5


In [77]:
merged_df.to_csv("merged_mood_data.csv", index=False)
merged_df.head()

Unnamed: 0,Date,Sleep Start,Wake-Up Time,Duration (Hours),Calories Burned,Adjusted Stress Level,Mood Score
0,2025-01-01,03:00:00,06:00:00,3.0,41.4,2,6
1,2025-01-02,02:00:00,09:00:00,7.0,64.0,2,4
2,2025-01-03,23:00:00,09:00:00,10.0,216.0,1,3
3,2025-01-04,23:00:00,07:00:00,8.0,68.0,8,6
4,2025-01-05,03:00:00,09:00:00,6.0,85.5,4,5


### Pre process info

In [78]:
df = pd.read_csv("merged_mood_data.csv")
df["Date"] = pd.to_datetime(df["Date"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Date                   31 non-null     datetime64[ns]
 1   Sleep Start            31 non-null     object        
 2   Wake-Up Time           31 non-null     object        
 3   Duration (Hours)       31 non-null     float64       
 4   Calories Burned        31 non-null     float64       
 5   Adjusted Stress Level  31 non-null     int64         
 6   Mood Score             31 non-null     int64         
dtypes: datetime64[ns](1), float64(2), int64(2), object(2)
memory usage: 1.8+ KB


In [79]:
# see how many missing values per column
print(df.isnull().sum())  

Date                     0
Sleep Start              0
Wake-Up Time             0
Duration (Hours)         0
Calories Burned          0
Adjusted Stress Level    0
Mood Score               0
dtype: int64


In [80]:
df.dropna(inplace=True)  # Removes rows with missing values