In [177]:
import numpy as np
import pandas as pd
import pyxdf
import logging
import time
import pprint
import ast
import pytz
from datetime import datetime, timedelta

from pison_cloud.pison.reaction.cloud.v1 import reaction_pb2, reaction_pb2_grpc
from pison_cloud.pison.common.cloud.v1 import common_pb2
from google.protobuf.timestamp_pb2 import Timestamp
from google.protobuf.json_format import MessageToDict

from ml_util.query.microservices import (
    PisonGrpc,
    ResponseConverter,
    get_users,
    get_all_metadata,
    get_reaction_tests,
    get_plan_data,
    get_baseline_data
)
from ml_util.query.utils import Env

from ml_util.pison_ready.readiness import get_score as get_readiness_score
from ml_util.pison_ready.agility import get_score as get_agility_score
from ml_util.pison_ready.focus import get_score as get_focus_score
from ml_util.pison_ready.pvtb import get_score as get_pvtb_score


import pandas as pd
from datetime import datetime, timedelta
import pyxdf

def extract_eda_streams(xdf_file_path):
    # Load the XDF file
    streams, fileheader = pyxdf.load_xdf(xdf_file_path)
    
    # Extract the reference time from the file header
    reference_time_str = fileheader['info']['datetime'][0]
    reference_time = datetime.strptime(reference_time_str, '%Y-%m-%dT%H:%M:%S%z')

    # Initialize a list to store the dataframes for each EDA stream
    eda_data = []

    # Loop through streams to find EDA streams
    for stream in streams:
        if stream['info']['type'][0] == 'EDA':
            # Extract data from the stream
            timestamps = stream['time_stamps']
            values = stream['time_series']
            source_id = stream['info']['source_id'][0]
            
            # Combine the data into a dataframe
            data = pd.DataFrame({
                'Timestamp': timestamps,
                'Value': [value[0] for value in values],  # Assuming a single channel EDA
                'SourceID': source_id
            })
            
            # Correct the hardware timestamps using the first timestamp in the dataframe as a reference
            reference_hardware_timestamp = float(data['Timestamp'].iloc[0])
            data['Converted_Timestamp'] = data['Timestamp'].apply(lambda x: reference_time + timedelta(seconds=(float(x) - reference_hardware_timestamp)))
            
            # Drop the original 'Timestamp' column and rename 'Converted_Timestamp' to 'Timestamp'
            data.drop(columns=['Timestamp'], inplace=True)
            data.rename(columns={'Converted_Timestamp': 'Timestamp'}, inplace=True)
            
            # Append the corrected dataframe to the list
            eda_data.append(data)

    # Return a combined dataframe for all EDA streams
    if len(eda_data) > 0:
        return pd.concat(eda_data, ignore_index=True)
    else:
        return pd.DataFrame(columns=['Timestamp', 'Value', 'SourceID'])

from datetime import datetime, timedelta
import pandas as pd

def get_stim_timestamps(df, log_file='skipped_sessions.txt'):
    """
    Calculate the timestamps for each stimulus, the time intervals between consecutive stimuli in seconds,
    the stimulus offsets by subtracting 80 milliseconds from each timestamp, map the first timestamp from the list 
    that is immediately after the stim time offset, compute the delta between onset_timestamp and stim time,
    and mark rows as false starts if necessary.

    Parameters:
    df (pd.DataFrame): DataFrame containing columns ['plan.stimuli', 'created_at', 'onset_moments', 'session_id']
    log_file (str): The file to log skipped sessions due to insufficient timestamps.

    Returns:
    pd.DataFrame: A DataFrame with timestamps, intervals between stimuli in seconds, stimulus offsets, mapped timestamps,
    delta between onset_timestamp and stim time, false starts, and trial numbers.
    """
    # Extract data from DataFrame
    data = df['plan.stimuli'].iloc[0]  # Assuming 'plan.stimuli' is a list of dictionaries in each row
    start_time = df['created_at'].iloc[0]  # Assuming 'created_at' is a single datetime-like string or pd.Timestamp
    timestamps_list = df['onset_moments'].iloc[0]  # Assuming 'onset_moments' is a list of timestamp strings
    session_id = df['session_id'].iloc[0]  # Assuming 'session_id' is a single string

    # Convert start time to a datetime object if it's not already
    if isinstance(start_time, str):
        start_time = datetime.fromisoformat(start_time)
    elif isinstance(start_time, pd.Timestamp):
        start_time = start_time.to_pydatetime()

    # Adjust for the 5-second countdown
    countdown_adjustment = timedelta(seconds=0)
    adjusted_start_time = start_time + countdown_adjustment

    # Check if there are enough timestamps
    if len(timestamps_list) < 45:
        with open(log_file, 'a') as f:
            f.write(f"Session {session_id} skipped: Less than 45 timestamps.\n")
        return None

    # Calculate the timestamps and intervals
    timeInSeconds = [entry['timeInSeconds'] for entry in data]
    no_go = [entry.get('noGo', False) for entry in data]
    timestamps = [adjusted_start_time + timedelta(seconds=entry['timeInSeconds']) for entry in data]
    stim_offsets = [timestamp - timedelta(milliseconds=80) for timestamp in timestamps]
    intervals = [timestamps[i] - timestamps[i-1] for i in range(1, len(timestamps))]
    intervals_in_seconds = [interval.total_seconds() for interval in intervals] + [None]

    # Parse the provided list of timestamps into datetime objects
    parsed_timestamps_list = [datetime.fromisoformat(ts.replace('Z', '+00:00')) for ts in timestamps_list]

    # Find the first timestamp from the list that is immediately after the stim time offset
    mapped_timestamps = []
    for i, stim_offset in enumerate(stim_offsets):
        next_stim_time = timestamps[i + 1] if i + 1 < len(timestamps) else timestamps[i] + timedelta(days=1)
        next_timestamp = next((ts for ts in parsed_timestamps_list if stim_offset < ts < next_stim_time), None)
        mapped_timestamps.append(next_timestamp)

    # Calculate the delta between onset_timestamp and stim time
    delta_onset_stim = [(mapped - stim).total_seconds() if pd.notna(mapped) else pd.NaT 
                        for mapped, stim in zip(mapped_timestamps, timestamps)]

    # Mark rows as false starts if delta_onset_stim is < 0.08
    false_starts = [(delta < 0.08) if pd.notna(delta) else False for delta in delta_onset_stim]

    # Create trial numbers starting from 1
    trials = list(range(1, len(timeInSeconds) + 1))

    # Create DataFrame
    result_df = pd.DataFrame({
        'trial': trials,
        'timeInSeconds': timeInSeconds,
        'stim time': timestamps,
        'stim time offset (-80ms)': stim_offsets,
        'ISI (seconds)': intervals_in_seconds,
        'start_of_test': start_time,
        'onset_timestamp': mapped_timestamps,
        'delta_onset_stim (seconds)': delta_onset_stim,
        'false start': false_starts,
        'noGo': no_go,
        'session_id': session_id
    })

    return result_df

import pandas as pd

def merge_eda_with_stim(single_test_df, eda_data_df):
    """
    Merges EDA data from two sensors with single test data based on the closest timestamp using merge_asof.
    
    Parameters:
    single_test_df (pd.DataFrame): DataFrame containing single test data with 'stim time' in UTC.
    eda_data_df (pd.DataFrame): DataFrame containing EDA data with 'Timestamp' in EST and 'SourceID'.
    
    Returns:
    pd.DataFrame: A merged DataFrame with the closest EDA data matched to each stim time for both sensors.
    """
    
    # Convert 'stim time' from UTC to EST for accurate matching
    single_test_df['stim time'] = pd.to_datetime(single_test_df['stim time'], utc=True).dt.tz_convert('US/Eastern')
    
    # Convert EDA timestamps to datetime if not already and ensure they are in the same timezone (US/Eastern)
    eda_data_df['Timestamp'] = pd.to_datetime(eda_data_df['Timestamp']).dt.tz_convert('US/Eastern')
    
    # Separate EDA data for each sensor
    eda_319 = eda_data_df[eda_data_df['SourceID'] == 'MD-V5-0000319'].sort_values('Timestamp')
    eda_395 = eda_data_df[eda_data_df['SourceID'] == 'MD-V5-0000395'].sort_values('Timestamp')
    
    # Perform asof merge to find the closest EDA record before or after the stim time for each sensor
    result_319 = pd.merge_asof(single_test_df.sort_values('stim time'), eda_319, 
                               left_on='stim time', right_on='Timestamp', 
                               direction='nearest', suffixes=('', '_319'))
    
    result_395 = pd.merge_asof(single_test_df.sort_values('stim time'), eda_395, 
                               left_on='stim time', right_on='Timestamp', 
                               direction='nearest', suffixes=('', '_395'))
    
    # Merge the two results to include both sensors in the final output
    final_result = result_319.merge(
        result_395[['stim time', 'Timestamp', 'Value']],
        on='stim time',
        suffixes=('_319', '_395'),
        how='left'
    ).rename(columns={'Timestamp_319': 'Timestamp_319', 'Value_319': 'EDA_Value_319',
                      'Timestamp_395': 'Timestamp_395', 'Value_395': 'EDA_Value_395'})

    return final_result


def align_hrv_to_test(single_test_df, hrv_df):
    # Convert HRV 'Date' from EST to UTC
    hrv_df['Date'] = pd.to_datetime(hrv_df['Date'])
    hrv_df['Date_UTC'] = hrv_df['Date'].dt.tz_localize('America/New_York').dt.tz_convert('UTC')

    # Convert 'start_of_test' and 'onset_timestamp' in single_test_df to datetime
    single_test_df['start_of_test'] = pd.to_datetime(single_test_df['start_of_test'])
    single_test_df['onset_timestamp'] = pd.to_datetime(single_test_df['onset_timestamp'])

    # Initialize columns for HRV data in single_test_df
    hrv_columns = hrv_df.columns.drop(['Date_UTC'])  # Keep 'Date' column

    for col in hrv_columns:
        single_test_df[f'HRV_{col}'] = None

    # Iterate through each row in the single test dataframe
    for index, test_row in single_test_df.iterrows():
        # Filter HRV rows by the same date as 'start_of_test'
        same_date_hrv = hrv_df[hrv_df['Date_UTC'].dt.date == test_row['start_of_test'].date()]
        
        if not same_date_hrv.empty:
            # Find the closest HRV 'Date_UTC' to 'onset_timestamp'
            closest_hrv_row = same_date_hrv.iloc[(same_date_hrv['Date_UTC'] - test_row['onset_timestamp']).abs().argsort()[:1]]
            
            # Update single_test_df with the closest HRV row data
            for col in hrv_columns:
                single_test_df.at[index, f'HRV_{col}'] = closest_hrv_row[col].values[0]

    return single_test_df



In [135]:
#csv from Wellatory 
hrv_data = pd.read_csv('HRV.csv')

In [None]:
#xdfs for EDA data
file_path = '' 
eda_streams_dataframes = extract_eda_streams(file_path)
eda_streams_dataframes

In [None]:
start_date = dt.datetime(2024, 8, 1, 0, 0, 0)
end_date = dt.datetime(2024, 8, 30, 0, 0, 0)
env = Env.STAGING
user_df = get_users(env)
user_df = user_df.drop(columns='created_at')

test_df = get_reaction_tests(env, start_date, end_date)

In [None]:
#get mikoshilab@pison.com sessions if no session_id and match via timestmaps
test_df[test_df['user_id']=='zJX3F0VX7EXwIHLN5Vca0wog2yf2']

In [163]:
# Assuming the DataFrame is test_df and the column with session_id is named 'session_id'
partial_id = "210d0605"  # Replace this with your actual partial ID string

# Filter the DataFrame for rows where 'session_id' contains the partial ID
matching_rows = test_df[test_df['session_id'].str.contains(partial_id, na=False)]

# If you expect only one match and want to get that specific row
if len(matching_rows) == 1:
    specific_row = matching_rows.iloc[0]
else:
    # If there are multiple matches, you can print or inspect them
    specific_row = matching_rows

session_id = specific_row['session_id']

In [None]:
#check to see if session id is there
session_id

In [171]:
current_test = test_df[test_df['id'] == 'd77613d1-d55b-5d4b-a7ab-27085be8cb83']

In [174]:
single_test = merge_eda_with_stim(get_stim_timestamps(current_test), eda_streams_dataframes)

In [176]:
test_data = align_hrv_to_test(single_test, hrv_data)

In [None]:
test_data.to_csv('')