In [5]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from opensignalsreader import OpenSignalsReader
import os

In [11]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os

def read_opensignals_data(filename):
    """Read OpenSignals data file and return timestamps and signals"""
    # Skip header lines
    header_lines = 0
    with open(filename, 'r') as f:
        for line in f:
            if line.startswith('#'):
                header_lines += 1
            else:
                break
    
    # Read data using pandas
    data = pd.read_csv(filename, skiprows=header_lines, delimiter='\t',
                      names=['nSeq', 'I1', 'I2', 'O1', 'O2', 'ECG', 'EDA'])
    
    # Reorder columns to match actual data
    data = data.rename(columns={'O2': 'ECG_temp', 'ECG': 'EDA_temp'})
    data['ECG'] = data['ECG_temp']
    data['EDA'] = data['EDA_temp']
    data = data.drop(['ECG_temp', 'EDA_temp'], axis=1)
    
    # Create sequential index for time calculation
    data = data.reset_index(drop=True)
    data['time_sec'] = data.index / 1000
    
    return data

def split_and_save_data(data, output_folder, participant):
    """Split data according to time ranges and save to CSV files"""
    conditions = {
        (60, 359): ('empty', 'bright'),    # 1min - 5min59s
        (360, 599): ('empty', 'dark'),     # 6min - 9min59s
        (1260, 1619): ('full', 'bright'),  # 21min - 26min59s
        (1620, 1800): ('full', 'dark')     # 27min - 30min
    }
    
    # Print overall statistics
    total_duration_sec = len(data) / 1000
    print(f"\nTotal samples: {len(data)}")
    print(f"Total duration: {total_duration_sec:.1f} seconds ({total_duration_sec/60:.1f} minutes)")
    
    for (start_sec, end_sec), (condition1, condition2) in conditions.items():
        # Calculate sample indices for the time range
        start_idx = int(start_sec * 1000)
        end_idx = int(end_sec * 1000)
        
        # Get data segment
        segment = data.iloc[start_idx:end_idx]
        
        print(f"\n{condition1}-{condition2}:")
        print(f"Time range {start_sec}-{end_sec} seconds")
        print(f"Number of samples: {len(segment)}")
        
        if len(segment) == 0:
            print(f"Warning: No data found for {participant} between {start_sec}-{end_sec} seconds")
            continue
            
        # Create output filenames
        output_filename_ecg = f"{output_folder}/{participant}_{condition1}-{condition2}_ecg.csv"
        output_filename_eda = f"{output_folder}/{participant}_{condition1}-{condition2}_eda.csv"
        
        # Save to CSV
        np.savetxt(output_filename_ecg, segment['ECG'].values, delimiter=',', header='ECG')
        np.savetxt(output_filename_eda, segment['EDA'].values, delimiter=',', header='EDA')
        
        print(f"Wrote files {output_filename_ecg} and {output_filename_eda}")

def main():
    # Parameters
    source_data_folder = '/Users/firefly/Desktop/Team_Messung/data/sourcedata'
    raw_data_folder = '/Users/firefly/Desktop/Team_Messung/data/rawdata'
    participants = ['sub-01', 'sub-02']
    
    # Create output directory if it doesn't exist
    os.makedirs(raw_data_folder, exist_ok=True)
    
    # Process each participant's data
    for participant in participants:
        # Read the long file
        filename = f"{source_data_folder}/{participant}_long.txt"
        
        try:
            # Read data
            data = read_opensignals_data(filename)
            
            # Debug information
            print(f"\nProcessing {filename}")
            print(f"Data shape: {data.shape}")
            print("\nFirst few rows:")
            print(data.head())
            
            # Split and save data
            split_and_save_data(data, raw_data_folder, participant)
            
        except FileNotFoundError:
            print(f"File not found: {filename}")
            continue
        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")
            continue

if __name__ == "__main__":
    main()


Processing /Users/firefly/Desktop/Team_Messung/data/sourcedata/sub-01_long.txt
Data shape: (1894650, 7)

First few rows:
   nSeq  I1  I2  O1  EDA  ECG  time_sec
0     0   0   0   0  203  587     0.000
1     0   0   1   1  203  589     0.001
2     0   0   1   1  203  590     0.002
3     0   0   1   1  203  593     0.003
4     0   0   1   1  203  597     0.004

Total samples: 1894650
Total duration: 1894.7 seconds (31.6 minutes)

empty-bright:
Time range 60-359 seconds
Number of samples: 299000
Wrote files /Users/firefly/Desktop/Team_Messung/data/rawdata/sub-01_empty-bright_ecg.csv and /Users/firefly/Desktop/Team_Messung/data/rawdata/sub-01_empty-bright_eda.csv

empty-dark:
Time range 360-599 seconds
Number of samples: 239000
Wrote files /Users/firefly/Desktop/Team_Messung/data/rawdata/sub-01_empty-dark_ecg.csv and /Users/firefly/Desktop/Team_Messung/data/rawdata/sub-01_empty-dark_eda.csv

full-bright:
Time range 1260-1619 seconds
Number of samples: 359000
Wrote files /Users/firefly/Des