DataSet:
https://physionet.org/content/sleep-accel/1.0.0/

  Walch, Olivia. "Motion and heart rate from a wrist-worn wearable and labeled sleep from polysomnography" (version 1.0.0). PhysioNet (2019). https://doi.org/10.13026/hmhs-py35.

  Olivia Walch, Yitong Huang, Daniel Forger, Cathy Goldstein, Sleep stage prediction with raw acceleration and photoplethysmography heart rate data derived from a consumer wearable device, Sleep, Volume 42, Issue 12, December 2019, zsz180, https://doi.org/10.1093/sleep/zsz180

  Goldberger, A., L. Amaral, L. Glass, J. Hausdorff, P. C. Ivanov, R. Mark, J. E. Mietus, G. B. Moody, C. K. Peng, and H. E. Stanley. "PhysioBank, PhysioToolkit, and PhysioNet: Components of a new research resource for complex physiologic signals. Circulation [Online]. 101 (23), pp. e215â€“e220." (2000).



In [None]:
pip install gdown



In [None]:
import os
import numpy as np
import pandas as pd
import glob
import tensorflow as tf
import gdown

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [None]:
# Run the code local working with google drive folder perms sucks
import os
import pandas as pd

def convert_to_csv(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Loop through the files in the input folder
    for filename in os.listdir(input_folder):
        # Check if the file is a text file
        if filename.endswith(".txt"):
            # Construct the full path for input and output files
            input_file_path = os.path.join(input_folder, filename)
            output_file_path = os.path.join(output_folder, filename.replace(".txt", ".csv"))

            # Read data from the text file with headers
            data = pd.read_csv(input_file_path, header=None, names=['timestamp', 'heart rate'], delimiter=',')

            # Save data as CSV with headers
            data.to_csv(output_file_path, index=False)

if __name__ == "__main__":
    # Specify input and output folders
    heart_rate_folder = ''
    motion_folder = ''
    labeled_sleep_folder = ''

    # Specify output folders
    output_heart_rate_folder = ''
    output_motion_folder = ''
    output_labeled_sleep_folder = ''

    # Convert text files to CSV for each folder
    convert_to_csv(heart_rate_folder, output_heart_rate_folder)
    convert_to_csv(motion_folder, output_motion_folder)
    convert_to_csv(labeled_sleep_folder, output_labeled_sleep_folder)

    print("Conversion completed.")



FileNotFoundError: ignored

In [None]:
# Run the code local working with google drive folder perms sucks
# Make the same format as labels, still need some manual cut at the end but that's fine
def preprocess_data(input_file, output_file, data_type, interval=30):
    # Load data
    data = pd.read_csv(input_file)

    # Remove rows with negative time values
    data = data[data['time'] >= 0]

    # Convert 'time' column to datetime
    data['time'] = pd.to_datetime(data['time'], unit='s')

    # Remove duplicates in the 'time' column
    data = data.drop_duplicates('time')

    # Set 'time' column as the index
    data.set_index('time', inplace=True)

    # Resample data to have a regular time interval (interval seconds)
    data_resampled = data.resample(f'{interval}S').mean()

    # Fill missing values with the previous available value
    data_resampled = data_resampled.fillna(method='pad')

    # Reset index to get 'time' back as a column
    data_resampled = data_resampled.reset_index()

    # Convert 'time' to integer values starting from 0
    data_resampled['time'] = (data_resampled['time'] - data_resampled['time'].min()).dt.seconds

    # Save the preprocessed data
    data_resampled.to_csv(output_file, index=False)

    print(f"{data_type} data preprocessing completed for {input_file}.")

def preprocess_folder(input_folder, output_folder, data_type):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Loop through each CSV file in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith(".csv"):
            input_file = os.path.join(input_folder, filename)
            output_file = os.path.join(output_folder, f"preprocessed_{filename}")

            # Preprocess each CSV file
            preprocess_data(input_file, output_file, data_type, interval=30)

# Specify input and output folders
heart_rate_input_folder = './converted_data/heart_rate'
motion_input_folder = './converted_data/motion'
labels_input_file = './converted_data/labels.csv'

preprocessed_heart_rate_output_folder = './preprocessed_data/heart_rate'
preprocessed_motion_output_folder = './preprocessed_data/motion'


# Preprocess heart rate data
preprocess_folder(heart_rate_input_folder, preprocessed_heart_rate_output_folder, 'Heart Rate')

# Preprocess motion data
preprocess_folder(motion_input_folder, preprocessed_motion_output_folder, 'Motion')


print("All preprocessing completed.")

In [None]:
#Combine based on id
# Specify the folder paths
heart_rate_folder = './preprocessed_data/heart_rate'
motion_folder = './preprocessed_data/motion'
labels_folder = './preprocessed_data/labels'

# Output folder for combined CSV files
combined_folder = './combined_data'

# Create the output folder if it doesn't exist
if not os.path.exists(combined_folder):
    os.makedirs(combined_folder)

# Loop through the files in the heart rate folder
for filename in os.listdir(heart_rate_folder):
    if filename.endswith('.csv'):
        # Extract subject ID from the filename
        subject_id = filename.split('_')[0]

        # Load heart rate data
        heart_rate_data = pd.read_csv(os.path.join(heart_rate_folder, filename))

        # Load motion data
        motion_filename = f'{subject_id}_acceleration.csv'
        motion_data = pd.read_csv(os.path.join(motion_folder, motion_filename))

        # Load labels data
        labels_filename = f'{subject_id}_labeled_sleep.csv'
        labels_data = pd.read_csv(os.path.join(labels_folder, labels_filename))

        # Combine the data based on a common column, e.g., 'time'
        combined_data = pd.merge(heart_rate_data, motion_data, on='time', how='outer')
        combined_data = pd.merge(combined_data, labels_data, on='time', how='outer')

        # Save the combined data to a new CSV file
        combined_filename = f'{subject_id}_combined.csv'
        combined_filepath = os.path.join(combined_folder, combined_filename)
        combined_data.to_csv(combined_filepath, index=False)

print("Combining completed.")

In [None]:
# model
# LSTM Model
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout


# Load the final combined CSV file
final_combined_file = './final_combined.csv'
data = pd.read_csv(final_combined_file)

# Drop rows with missing values
data = data.dropna()

# Treat -1 as 0
data['sleep_motion'] = data['sleep_motion'].replace(-1, 0)

# Extract features and labels
features = data.drop(['sleep_motion'], axis=1)  # Drop the label column
labels = data['sleep_motion']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for LSTM model (assuming your data is time series)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(32, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(6, activation='softmax'))  # Assuming 6 sleep stages (from 0 to 5)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy[1] * 100:.2f}%")

model.save('sleep_model.h5')
