## Iterative Denoising of Physiological Data ##

This script applies denoising to each GSR and respiration/breath data set of each participant, prints out a plot with raw and denoised data and scr/scl/respiration rate values. The user is asked to evaluate the denoising result: with input "y" (yes), the denoising is registered as successful, and the algorithm continues with the next data set. With input "n" (no), the denoising algorithm is applied and plotted again for the user to evaluate. The maximum number of iterations is set to 10. Ideal denoising algorithms were found by trial and error. The physiological measures **scr**, **scl** and **respiration rate** of each processed file are logged in "physiological_measures.csv", iterations of denoising are logged in "denoising_log.csv".

Denoising for **GSR** data:
- Apply moving average
    - using numpy.convolve with window size=10, mode='valid'
- Apply moving average again

Denoising for **respiration/breath** data:
- Apply neurokit2 'biosppy' cleaning method

<span style='color: red;'>**Evaluating all 1.240 data sets (62 participants * 10 scenes * 2 biosensors) takes some time.**</span>

**Data not in git - you need to have the data to be able to run it**



In [None]:
import os
import json
import numpy as np
import neurokit2 as nk
import matplotlib.pyplot as plt
import csv

data_path = "../data"
#in some cases, the sensor channels were switched so in the log files breath and gsr data are also switched.
switch_ids = {'1111', '7878', '4545', '5656', '7575', '8585', '9898', '7373', '3929', '9191', '2343', '99991', '99992', '99993', '99994', '99995', '99996', '99997', '99998', '99999', '88881'}

# get all JSON files from folders and subfolders
def get_json_files(path):
    json_files = []
    for root, _, files in os.walk(path):
        for file in files:
            if file.endswith(".json"):
                json_files.append(os.path.join(root, file))
    return json_files

# load and filter data, start at "StartScene" special event
def load_filtered_data(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    filtered_data = []
    start_scene = False
    for item in data["Items"]:
        if item["specialEvent"] == "StartScene":
            start_scene = True
        if start_scene:
            filtered_data.append(item)
    return filtered_data

# denoise GSR data
def denoise_gsr(gsr_data):
    window_size = 10
    gsr_cleaned = np.convolve(gsr_data, np.ones(window_size)/window_size, mode='valid')
    gsr_cleaned = np.convolve(gsr_cleaned, np.ones(window_size)/window_size, mode='valid')
    return gsr_cleaned

# denoise breath data
def denoise_breath(breath_data):
    breath_cleaned = nk.rsp_clean(breath_data, sampling_rate=10, method='biosppy')
    return breath_cleaned

# visualize data and check feedback
def visualize_and_get_feedback(original_data, denoised_data, sensor_type, scr_scl=None, resp_rate=None):
    plt.figure()
    plt.plot(original_data, label="Original")
    plt.plot(denoised_data, label="Denoised")
    plt.legend()

    if scr_scl:
        plt.figtext(0.1, 0.9, f"SCR: {scr_scl[0]}, SCL: {scr_scl[1]}")
    if resp_rate:
        plt.figtext(0.1, 0.8, f"Respiration Rate: {resp_rate}")

    plt.show()
    feedback = input("Was denoising successful? (y/n): ")
    return feedback.strip().lower()

# logging iterations to CSV
def log_results(file, sensor, iteration_results, log_file):
    row = [file, sensor] + iteration_results + [0] * (10 - len(iteration_results))
    with open(log_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)

#logging results to CSV
def write_results(file, participantid, scr, scl, resp_rate, result_file):
    row = [file, participantid, scl, scr, resp_rate]
    with open(result_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)

# process all files
def iterative_denoising(data_path):
    files = get_json_files(data_path)
    log_file = "IterativeDenoising/denoising_log.csv"
    result_file = "IterativeDenoising/physiological_measures.csv"

    for file in files:
        print(f"Processing file: {file}")
        data = load_filtered_data(file)
        
        # extract GSR and breath data, switch gsr and breath data if participant has switched id
        participant_id = os.path.basename(os.path.dirname(file))
        if participant_id in switch_ids:
            # multiply with 0.0015 to convert to gsr range 
            gsr_data = [float(item["breathData"].replace(",", ".")) * 0.0015 for item in data]
            breath_data = [float(item["gsrData"].replace(",", ".")) for item in data]
        else:
            gsr_data = [float(item["gsrData"].replace(",", ".")) for item in data]
            breath_data = [float(item["breathData"].replace(",", ".")) for item in data]

        # iteration tracking
        gsr_iterations = []
        breath_iterations = []
        max_iterations = 10
        success = False
        iteration = 0

        # GSR Data Processing
        denoised_gsr = gsr_data
        while not success and iteration < max_iterations:
            denoised_gsr = denoise_gsr(denoised_gsr)
            eda_signals, eda_info = nk.eda_process(denoised_gsr, sampling_rate=10)
            # Extract SCL and SCR
            scl = eda_signals['EDA_Tonic'].mean()
            scr = len(eda_info['SCR_Onsets'])
            print("scl: " + str(scl) + " , scr: " + str(scr))
            scr_scl = (scr, scl)
            feedback = visualize_and_get_feedback(gsr_data, denoised_gsr, "GSR", scr_scl=scr_scl)
            gsr_iterations.append(feedback)
            if feedback == 'y':
                success = True
            iteration += 1
        log_results(file, "gsr", gsr_iterations, log_file)

        # reset
        success = False
        iteration = 0

        # Breath Data Processing
        denoised_breath = breath_data
        while not success and iteration < max_iterations:
            denoised_breath = denoise_breath(denoised_breath)
            rsp_signals, rsp_info = nk.rsp_process(denoised_breath, sampling_rate=10) # Get respiration rate
            resp_rate = rsp_signals['RSP_Rate'].mean()
            print("resp_rate" + str(resp_rate))
            feedback = visualize_and_get_feedback(breath_data, denoised_breath, "Breath", resp_rate=resp_rate)
            breath_iterations.append(feedback)
            if feedback == 'y':
                success = True
            iteration += 1
        log_results(file, "breath", breath_iterations, log_file)
        
        #write results in resultfile
        write_results(file, participant_id, scl, scr, resp_rate, result_file)

iterative_denoising(data_path)
