In [None]:
# ============================================================
# Smart Stress Monitoring System
# Data Preprocessing using Google Sheets + Google Colab
# Sensors: GSR and EMG
# ============================================================

# ----------- Install Required Libraries (Run Once) -----------
!pip install gspread oauth2client pandas scikit-learn matplotlib seaborn

# -------------------- Import Libraries ----------------------
import gspread
from google.colab import auth
from google.auth import default

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

# ---------------- Google Sheets Authentication ----------------
# Authenticate the user and connect to Google Sheets
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

# ------------------ Load Data from Google Sheets ----------------
# NOTE: Replace with your actual Google Sheet URL when running
sheet_url = "https://docs.google.com/spreadsheets/d/1Eto7AfjxMDvJVw5Pq9YjRK_5VEatQUxU2SloIqug4eY/edit?gid=0#gid=0"
worksheet = gc.open_by_url(sheet_url).sheet1

# Fetch all records and convert to DataFrame
data = worksheet.get_all_records()
df = pd.DataFrame(data)

# Convert Timestamp column to datetime format
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Display initial data
print("Initial Data:")
df.head()

# -------------------- Data Cleaning ---------------------------
# Convert GSR and EMG columns to numeric values
df['GSR'] = pd.to_numeric(df['GSR'], errors='coerce')
df['EMG'] = pd.to_numeric(df['EMG'], errors='coerce')

# Handle missing values using forward and backward fill
df = df.fillna(method='ffill').fillna(method='bfill')

# -------------------- Data Normalization ----------------------
# Standardize GSR and EMG values (Z-score normalization)
df['GSR'] = (df['GSR'] - df['GSR'].mean()) / df['GSR'].std()
df['EMG'] = (df['EMG'] - df['EMG'].mean()) / df['EMG'].std()

# -------------------- Feature Engineering ---------------------
# Create rolling window statistical features
window_size = 10

df['GSR_Mean'] = df['GSR'].rolling(window=window_size).mean()
df['EMG_Mean'] = df['EMG'].rolling(window=window_size).mean()
df['GSR_STD']  = df['GSR'].rolling(window=window_size).std()
df['EMG_STD']  = df['EMG'].rolling(window=window_size).std()

# Remove rows with NaN values created by rolling calculations
df.dropna(inplace=True)

# -------------------- Final Feature Set -----------------------
# Features used for anomaly detection and classification
X = df[['GSR', 'EMG', 'GSR_Mean', 'EMG_Mean', 'GSR_STD', 'EMG_STD']]

print("\nPreprocessed Feature Data:")
X.head()
