In [19]:
import os

# Get the current working directory and go up one level to reach root
DATA_DIR = os.path.join(os.getcwd(), '..', 'data')
print(f"Data directory: {DATA_DIR}")

# list contents of the data directory
print(os.listdir(DATA_DIR))

import pandas as pd
import json
from datetime import datetime, timedelta

# Add this to help Python find the module
import sys
sys.path.append(os.path.join(os.getcwd(), '..'))

from health_data import fetch_garmin_health_data

Data directory: /Users/maudhelenhovland/Desktop/AffectiveLamp/data_processing/../data
['emotion_dataset.csv', 'garmin_health_data.json', 'merged_data.csv', 'emotion_data.json', 'combined_emotion_data.csv', 'valence_dataset.csv', 'garmin_data.csv', 'heart_rate_emotion_dataset.csv', 'arousal_dataset.csv']


In [20]:
app_data = pd.read_csv('/Users/maudhelenhovland/Desktop/AffectiveLamp/my-va-app/data/emotion_data.csv')
print("Successfully loaded app data")
display(app_data.head())
print("\nApp data shape:", app_data.shape)

# #Drop the hue sat and bri columns
# app_data = app_data.drop(columns=['hue', 'saturation', 'brightness'])
# display(app_data.head())
# print("\nApp data shape:", app_data.shape)

Successfully loaded app data


Unnamed: 0,timestamp,valence,arousal,emotion,hue,saturation,brightness
0,2025-03-27T17:18:00Z,0.537143,0.451429,Happy,21845,254,254.0
1,2025-03-27T17:20:00Z,0.565714,0.245714,Happy,21845,254,254.0
2,2025-03-27T17:20:00Z,0.645714,0.474286,Happy,21845,254,254.0
3,2025-03-27T17:30:00Z,0.466878,0.884322,Excited,21845,254,254.0
4,2025-03-27T17:32:00Z,0.754286,0.377143,Happy,21845,254,254.0



App data shape: (45, 7)


In [21]:
try:
    with open(DATA_DIR+'/emotion_data.json', 'r') as f:
        manual_data = json.load(f)
    manual_df = pd.DataFrame(manual_data)
    print("Successfully loaded manual emotion data")
    display(manual_df.head())
    print("\nManual data shape:", manual_df.shape)
except Exception as e:
    print(f"Error loading manual data: {e}")
    manual_df = pd.DataFrame()


Successfully loaded manual emotion data


Unnamed: 0,timestamp,emotion,valence,arousal
0,2025-02-24T19:05:00Z,Happy,0.98,0.55
1,2025-02-24T18:12:00Z,Happy,0.99,0.5
2,2025-02-24T17:32:00Z,Happy,0.89,0.54
3,2025-02-25T19:05:00Z,Excited,0.67,0.76
4,2025-02-25T16:53:00Z,Neutral,-0.01,-0.04



Manual data shape: (57, 4)


In [22]:
def round_down_to_even_minutes(timestamp):
    """Round down timestamp minutes to nearest even number and format consistently"""
    rounded_minute = timestamp.minute - (timestamp.minute % 2)
    rounded_time = timestamp.replace(minute=rounded_minute, second=0, microsecond=0)
    # Format to match "2025-03-27T17:18:00.000Z" format
    return rounded_time.strftime("%Y-%m-%dT%H:%M:00Z")

# Convert timestamp to datetime if it isn't already
manual_df['timestamp'] = pd.to_datetime(manual_df['timestamp'])

# Apply rounding function and format
manual_df['timestamp'] = manual_df['timestamp'].apply(round_down_to_even_minutes)

# Display the result
display(manual_df.head())

Unnamed: 0,timestamp,emotion,valence,arousal
0,2025-02-24T19:04:00Z,Happy,0.98,0.55
1,2025-02-24T18:12:00Z,Happy,0.99,0.5
2,2025-02-24T17:32:00Z,Happy,0.89,0.54
3,2025-02-25T19:04:00Z,Excited,0.67,0.76
4,2025-02-25T16:52:00Z,Neutral,-0.01,-0.04


# HEALTH DATA FROM GARMIN

In [23]:
# fetch data from the data/ folder called new_garmin_data.csv
health_data = pd.read_csv('/Users/maudhelenhovland/Desktop/AffectiveLamp/data/new_garmin_data.csv')
print("Successfully loaded health data")
display(health_data.head())
print("\nHealth data shape:", health_data.shape)


FileNotFoundError: [Errno 2] No such file or directory: '/Users/maudhelenhovland/Desktop/AffectiveLamp/data/new_garmin_data.csv'

In [8]:
#Drop columns with negative values
# First, let's identify columns that might have negative values
numeric_columns = ['stress', 'respiration', 'heart_rate', 'spo2']

# Print original shape
print("Original shape:", health_data.shape)

# Drop rows where any numeric column has negative values
for column in numeric_columns:
    # Print number of negative values in each column before dropping
    neg_count = (health_data[column] < 0).sum()
    if neg_count > 0:
        print(f"Found {neg_count} negative values in {column}")
        health_data = health_data[health_data[column] >= 0]

# Print final shape
print("\nShape after removing negative values:", health_data.shape)

# Display the cleaned data
display(health_data.head())

Original shape: (22005, 8)
Found 4446 negative values in stress
Found 598 negative values in respiration

Shape after removing negative values: (16931, 8)


Unnamed: 0,timestamp,heart_rate,stress,respiration,body_battery,spo2,hrv,sleep_score
1,2025-04-01T22:02:00Z,64.0,31.0,18.0,,94.0,,50.0
2,2025-04-01T22:04:00Z,63.0,31.0,17.0,,94.0,,50.0
3,2025-04-01T22:06:00Z,64.0,28.0,16.0,,94.0,,50.0
4,2025-04-01T22:08:00Z,65.0,28.0,17.0,,94.0,,50.0
5,2025-04-01T22:10:00Z,65.0,28.0,17.0,,94.0,,50.0


In [9]:
# Check missing values
print("\nMissing values in health data:")
display(health_data.isnull().sum())

# Check for duplicate timestamps
duplicate_timestamps = health_data[health_data.duplicated(subset=['timestamp'])]



Missing values in health data:


timestamp           0
heart_rate          3
stress              0
respiration         0
body_battery     2422
spo2             4273
hrv             16931
sleep_score      1909
dtype: int64

# Mergin automatic and manual emotion data

## MAKE ALL TIMESTAMPS ALIKE

In [10]:
# Concatenate app_data and manual_df
combined_df = pd.concat([app_data, manual_df], ignore_index=True)

# Sort by timestamp
combined_df = combined_df.sort_values('timestamp')

# Reset index after sorting
combined_df = combined_df.reset_index(drop=True)

# Display the result
display(combined_df.head())
print("\nCombined data shape:", combined_df.shape)

Unnamed: 0,timestamp,valence,arousal,emotion,hue,saturation,brightness
0,2025-02-24T17:32:00Z,0.89,0.54,Happy,,,
1,2025-02-24T18:12:00Z,0.99,0.5,Happy,,,
2,2025-02-24T19:04:00Z,0.98,0.55,Happy,,,
3,2025-02-25T12:12:00Z,0.03,-0.04,Neutral,,,
4,2025-02-25T12:36:00Z,-0.07,-0.04,Upset,,,



Combined data shape: (99, 7)


In [11]:
combined_df.to_csv('/Users/maudhelenhovland/Desktop/AffectiveLamp/data/combined_emotion_data.csv', index=False)

# WHOLE DATASET

In [15]:
# Check datatypes in combined_df
print("\nDatatypes in combined_df:")
display(combined_df.dtypes)

# Check for duplicate timestamps in combined_df
duplicate_timestamps = combined_df[combined_df.duplicated(subset=['timestamp'])]

print(display(combined_df.head()))


Datatypes in combined_df:


timestamp      object
valence       float64
arousal       float64
emotion        object
hue           float64
saturation    float64
brightness    float64
dtype: object

Unnamed: 0,timestamp,valence,arousal,emotion,hue,saturation,brightness
0,2025-02-24T17:32:00Z,0.89,0.54,Happy,,,
1,2025-02-24T18:12:00Z,0.99,0.5,Happy,,,
2,2025-02-24T19:04:00Z,0.98,0.55,Happy,,,
3,2025-02-25T12:12:00Z,0.03,-0.04,Neutral,,,
4,2025-02-25T12:36:00Z,-0.07,-0.04,Upset,,,


None


In [16]:
#Check datatypes in health_data
print("\nDatatypes in health_data:")
display(health_data.dtypes)

#Check for duplicate timestamps in health_data
duplicate_timestamps = health_data[health_data.duplicated(subset=['timestamp'])]

print(display(health_data.head()))



Datatypes in health_data:


timestamp        object
heart_rate      float64
stress          float64
respiration     float64
body_battery    float64
spo2            float64
hrv             float64
sleep_score     float64
dtype: object

Unnamed: 0,timestamp,heart_rate,stress,respiration,body_battery,spo2,hrv,sleep_score
1,2025-04-01T22:02:00Z,64.0,31.0,18.0,,94.0,,50.0
2,2025-04-01T22:04:00Z,63.0,31.0,17.0,,94.0,,50.0
3,2025-04-01T22:06:00Z,64.0,28.0,16.0,,94.0,,50.0
4,2025-04-01T22:08:00Z,65.0,28.0,17.0,,94.0,,50.0
5,2025-04-01T22:10:00Z,65.0,28.0,17.0,,94.0,,50.0


None


In [17]:
#merge on timestamps with the health_data and app_and_manual_data
merged_data = pd.merge(health_data, combined_df, on='timestamp', how='left')
display(merged_data.head())

Unnamed: 0,timestamp,heart_rate,stress,respiration,body_battery,spo2,hrv,sleep_score,valence,arousal,emotion,hue,saturation,brightness
0,2025-04-01T22:02:00Z,64.0,31.0,18.0,,94.0,,50.0,,,,,,
1,2025-04-01T22:04:00Z,63.0,31.0,17.0,,94.0,,50.0,,,,,,
2,2025-04-01T22:06:00Z,64.0,28.0,16.0,,94.0,,50.0,,,,,,
3,2025-04-01T22:08:00Z,65.0,28.0,17.0,,94.0,,50.0,,,,,,
4,2025-04-01T22:10:00Z,65.0,28.0,17.0,,94.0,,50.0,,,,,,


In [18]:
#Save as a csv file
merged_data.to_csv(DATA_DIR+'/merged_data.csv', index=False)