In [5]:
import pandas as pd
import os
from datetime import datetime

def transform_emg_data(input_csv, output_csv):
    """Transforms a CSV file by reformatting timestamps to ISO 8601 format with milliseconds."""
    # Load the CSV file
    df = pd.read_csv(input_csv)

    # Extract column names (excluding timestamp)
    channels = df.columns[1:]

    # Prepare an empty list for the transformed data
    transformed_data = []

    # Define possible timestamp formats
    timestamp_formats = [
        "%d/%m/%Y %H:%M:%S.%f",  # 19/12/2023 00:17:57.870000
        "%d/%m/%Y %H:%M:%S",     # 19/12/2023 00:17:57
        "%d/%m/%Y %H:%M",        # 19/12/2023 00:17
        "%Y-%m-%d %H:%M:%S.%f",  # 2023-12-19 00:17:57.870000
        "%Y-%m-%d %H:%M:%S",     # 2023-12-19 00:17:57
        "%Y-%m-%d %H:%M"         # 2023-12-19 00:17
    ]

    for row_idx, row in df.iterrows():
        timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces

        parsed_time = None
        for fmt in timestamp_formats:
            try:
                parsed_time = datetime.strptime(timestamp_str, fmt)
                break
            except ValueError:
                continue

        if parsed_time is None:
            print(f"Skipping row {row_idx}: Unrecognized timestamp format → {timestamp_str}")
            continue  # Skip problematic rows

        # Convert timestamp to ISO 8601 format with milliseconds
        formatted_time = parsed_time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"

        # Iterate through each channel
        for channel in channels:
            transformed_data.append([channel, formatted_time, row[channel], ""])  # Empty label

    # Create a new DataFrame
    transformed_df = pd.DataFrame(transformed_data, columns=["series", "timestamp", "value", "label"])

    # Save the transformed data
    transformed_df.to_csv(output_csv, index=False)
    print(f"Processed: {input_csv} → {output_csv}")

def process_all_csv_files(input_folder, output_folder):
    """Processes all CSV files in input_folder and saves them in output_folder."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)  # Ensure output folder exists

    # Loop through all CSV files in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith(".csv"):  # Process only CSV files
            input_csv = os.path.join(input_folder, filename)
            output_csv = os.path.join(output_folder, filename.replace(".csv", "_prcsd.csv"))
            
            print(f"Processing file: {filename}")
            transform_emg_data(input_csv, output_csv)

# Folder paths
input_folder = r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan"
output_folder = r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data"

# Process all CSV files in the folder
process_all_csv_files(input_folder, output_folder)


Processing file: Blinking.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\Blinking.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\Blinking_prcsd.csv
Processing file: blinks.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\blinks.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\blinks_prcsd.csv
Processing file: eye gaze left right 1.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\eye gaze left right 1.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\eye gaze left right 1_prcsd.csv
Processing file: eye gaze left right 2.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\eye gaze left right 2.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\eye gaze left right 2_prcsd.csv
Processing file: Eye Gazing.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\Eye Gazing.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\Eye Gazing_prcsd.csv
Processing file: eye movements up down.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\eye movements up down.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\eye movements up down_prcsd.csv
Processing file: noise.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\noise.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\noise_prcsd.csv
Processing file: raise eybrows quick + garbage.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\raise eybrows quick + garbage.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\raise eybrows quick + garbage_prcsd.csv
Processing file: raise eyebrows and hold.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\raise eyebrows and hold.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\raise eyebrows and hold_prcsd.csv
Processing file: reading outloud.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\reading outloud.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\reading outloud_prcsd.csv
Processing file: reading.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\reading.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\reading_prcsd.csv
Processing file: smiling.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\smiling.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\smiling_prcsd.csv
Processing file: squinting.csv


  timestamp_str = str(row[0]).strip()  # Ensure timestamp is a string and remove any spaces


Processed: C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\squinting.csv → C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\processed_data\squinting_prcsd.csv


In [None]:
import pandas as pd
import os

def convert_long_to_wide(input_csv, output_csv):
    """
    Converts a long-format CSV file back to wide format with labels applied across all channels.
    
    Parameters:
        input_csv (str): Path to the processed labeled CSV file.
        output_csv (str): Path to save the reconstructed wide-format CSV file.
    """

    # Load the processed labeled CSV file
    df = pd.read_csv(input_csv)

    # Pivot the data back to wide format (timestamps as rows, channels as columns)
    df_wide = df.pivot(index="timestamp", columns="series", values="value").reset_index()

    # Extract labels only from 'channel_1' and apply to all rows with the same timestamp
    df_labels = df[df["series"] == "channel_1"][["timestamp", "label"]]

    # Merge labels back into the wide-format dataframe
    df_wide = df_wide.merge(df_labels, on="timestamp", how="left")

    # Manually define correct column order (channel_1, channel_2, ..., channel_16, label)
    channel_order = [f"channel_{i}" for i in range(1, 17)]  # Assuming 16 channels
    column_order = ["timestamp"] + channel_order + ["label"]

    # Reorder the columns manually
    df_wide = df_wide[column_order]

    # Save to CSV (keeping original channel names)
    df_wide.to_csv(output_csv, index=False)

    print(f"Transformation complete. Output saved to {output_csv}")


# Example usage
if __name__ == "__main__":
    input_csv = r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\before_format\eye gaze left right 1_prcsd-labeled.csv"
    output_csv = r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye gaze left right 1_labeled_af.csv"
    
    convert_long_to_wide(input_csv, output_csv)


In [1]:
import pandas as pd
import os

def convert_long_to_wide(input_csv, output_csv):
    """
    Converts a long-format CSV file back to wide format with labels applied across all channels.
    Handles duplicate (timestamp, series) pairs by averaging values.

    Parameters:
        input_csv (str): Path to the processed labeled CSV file.
        output_csv (str): Path to save the reconstructed wide-format CSV file.
    """

    # Load the processed labeled CSV file
    df = pd.read_csv(input_csv)

    # Remove duplicates by averaging duplicate values per (timestamp, series)
    #df = df.groupby(["timestamp", "series"], as_index=False).agg({"value": "mean", "label": "first"})

    # Pivot the data back to wide format (timestamps as rows, channels as columns)
    df_wide = df.pivot(index="timestamp", columns="series", values="value").reset_index()

    # Extract labels only from 'channel_2' and apply to all rows with the same timestamp
    df_labels = df[df["series"] == "channel_8"][["timestamp", "label"]]

    # Merge labels back into the wide-format dataframe
    df_wide = df_wide.merge(df_labels, on="timestamp", how="left")

    # Manually define correct column order (ensuring channels are ordered numerically)
    channel_order = sorted([col for col in df_wide.columns if col.startswith("channel_")], key=lambda x: int(x.split("_")[1]))
    column_order = ["timestamp"] + channel_order + ["label"]

    # Reorder the columns manually
    df_wide = df_wide[column_order]

    # Save to CSV (keeping original channel names)
    df_wide.to_csv(output_csv, index=False)

    print(f"Transformation complete. Output saved to {output_csv}")

# Example usage
if __name__ == "__main__":
    input_csv = r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\before_format\eye movements up down_prcsd-labeled.csv"
    output_csv = r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye movements up down-labeled-af.csv"
    convert_long_to_wide(input_csv, output_csv)


  from pandas.core import (


Transformation complete. Output saved to C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye movements up down-labeled-af.csv


In [8]:
import pandas as pd
import os

def convert_long_to_wide(input_csv, output_csv):
    """
    Converts a long-format CSV file back to wide format with labels applied across all channels.
    Handles duplicate (timestamp, series) pairs by averaging values and trims dataset after last labeled timestamp.
    Also counts duplicates before and after the cut.

    Parameters:
        input_csv (str): Path to the processed labeled CSV file.
        output_csv (str): Path to save the reconstructed wide-format CSV file.
    """

    # Load the processed labeled CSV file
    df = pd.read_csv(input_csv)

    # Count total duplicate (timestamp, series) pairs before processing
    total_duplicates = df.duplicated(subset=["timestamp", "series"], keep=False).sum()
    print(f"Total duplicate (timestamp, series) pairs before processing: {total_duplicates}")

    # Remove duplicates by averaging duplicate values per (timestamp, series)
    df = df.groupby(["timestamp", "series"], as_index=False).agg({"value": "mean", "label": "first"})

    # Pivot the data back to wide format (timestamps as rows, channels as columns)
    df_wide = df.pivot(index="timestamp", columns="series", values="value").reset_index()

    # Extract labels only from 'channel_1' and apply to all rows with the same timestamp
    df_labels = df[df["series"] == "channel_1"][["timestamp", "label"]]

    # Merge labels back into the wide-format dataframe
    df_wide = df_wide.merge(df_labels, on="timestamp", how="left")

    # Manually define correct column order (ensuring channels are ordered numerically)
    channel_order = sorted([col for col in df_wide.columns if col.startswith("channel_")], key=lambda x: int(x.split("_")[1]))
    column_order = ["timestamp"] + channel_order + ["label"]

    # Reorder the columns manually
    df_wide = df_wide[column_order]

    # Trim dataset after the last timestamp that contains a label
    last_labeled_timestamp = df_wide[df_wide["label"].notna()]["timestamp"].max()
    df_wide = df_wide[df_wide["timestamp"] <= last_labeled_timestamp]

    # Count duplicate (timestamp, series) pairs after trimming
    duplicates_after_cut = df_wide.duplicated(subset=["timestamp"], keep=False).sum()
    print(f"Duplicate (timestamp, series) pairs after cut: {duplicates_after_cut}")

    # Save to CSV (keeping original channel names)
    df_wide.to_csv(output_csv, index=False)

    print(f"Transformation complete. Output saved to {output_csv}")

# Example usage
if __name__ == "__main__":
    input_csv = r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\before_format\raise eybrows quick + garbage_prcsd-labeled.csv"
    output_csv = r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\raise eybrows quick + garbage_labeled_af.csv"
    
    convert_long_to_wide(input_csv, output_csv)


  df = pd.read_csv(input_csv)


Total duplicate (timestamp, series) pairs before processing: 61696
Duplicate (timestamp, series) pairs after cut: 0
Transformation complete. Output saved to C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\raise eybrows quick + garbage_labeled_af.csv
