In [2]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import pytz
import os
from pathlib import Path

In [3]:
data_dir = ['S'+ str(i) for i in range(1, 11)]
data_mt1_dir = [data_path + "/Midterm 1" for data_path in data_dir]
data_mt2_dir = [data_path + "/Midterm 2" for data_path in data_dir]
data_final_dir = [data_path + "/Final" for data_path in data_dir]

# For now let's only have eda and hr
file_list = ['EDA', 'HR']

In [4]:
def convert_unix_to_readable(unix_timestamp, timezone='America/Chicago'):
    """
    Convert a Unix timestamp to a human-readable datetime in the specified timezone
    
    Parameters:
    unix_timestamp (int or float): Unix timestamp in seconds
    timezone (str): Timezone name (default: 'America/Chicago' for CT/CDT)
    
    Returns:
    str: Human-readable datetime string
    """
    # Convert Unix timestamp to UTC datetime
    utc_time = datetime.utcfromtimestamp(unix_timestamp)
    
    # Make the datetime timezone-aware (UTC)
    utc_time = pytz.utc.localize(utc_time)
    
    # Convert to Central Time
    central_tz = pytz.timezone(timezone)
    central_time = utc_time.astimezone(central_tz)
    
    # Format the datetime in a human-readable format
    readable_time = central_time.strftime('%H:%M:%S')
    
    return readable_time

# Example usage
unix_timestamp = 1539435366  # This is September 30, 2021, 7:00:00 PM CDT
readable_datetime = convert_unix_to_readable(unix_timestamp)
print(f"Unix timestamp {unix_timestamp} converts to: {readable_datetime}")

# # If your timestamp is already in CT/CDT (not UTC)
# # You would need to adjust the conversion differently
# def convert_local_unix_to_readable(unix_timestamp, timezone='America/Chicago'):
#     """Convert a Unix timestamp that's already in local time to readable format"""
#     local_tz = pytz.timezone(timezone)
#     local_time = datetime.fromtimestamp(unix_timestamp, local_tz)
#     return local_time.strftime('%Y-%m-%d %H:%M:%S')

# # Example with timestamp already in CT/CDT
# local_readable = convert_local_unix_to_readable(unix_timestamp)
# print(f"If timestamp is already in CT/CDT: {local_readable}")

Unix timestamp 1539435366 converts to: 07:56:06


  utc_time = datetime.utcfromtimestamp(unix_timestamp)


In [5]:
def convert_time(df, freq, start):
    start = pd.to_datetime(start, format="%H:%M:%S")
    df['timestamp'] = df.index.map(lambda seconds: (start + timedelta(seconds=seconds / freq)).strftime("%H:%M:%S"))
    return df

def add_period(df, start_time, end_time):
    # Create selections
    conditions = [
        df['timestamp'] < start_time,
        (df['timestamp'] >= start_time) & (df['timestamp'] <= end_time),
        df['timestamp'] > end_time
    ]
    choices = ['pre-test', 'in-test', 'post-test']
    
    df['period'] = np.select(conditions, choices, default='unknown')
    
    return df

def process(dir, category, midterm=True):
    '''
    Parameters:
        dir: The working directory of the data
        category: the biosign name that aligns with the file names
        midterm: whether this is a midterm
    '''
    # Set the start and end time
    start = "09:00:00"
    end = "10:30:00" if midterm else "12:00:00"

    # Initialize the dataset
    df = pd.read_csv(dir+f"/{category.upper()}.csv")
    df_start = convert_unix_to_readable(float(df.columns[0]))
    df_freq = int(df.iloc[0, 0])
    df = df.iloc[1:, :].reset_index(drop=True)
    df.columns = [category]

    # Add timestamp column
    df = convert_time(df, df_freq, df_start)

    # Add period column
    df = add_period(df, start, end)

    return df

In [6]:
mt1_list = [[process(student, file) for file in file_list] for student in data_mt1_dir]
mt2_list = [[process(student, file) for file in file_list] for student in data_mt2_dir]
final_list = [[process(student, file, False) for file in file_list] for student in data_final_dir]

  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time = datetime.utcfromtimestamp(unix_timestamp)
  utc_time

In [7]:
for i in range(1, 1+len(data_dir)):
    mt1_path = f"S{i}_processed/Midterm 1/"
    mt2_path = f"S{i}_processed/Midterm 2/"
    final_path = f"S{i}_processed/Final/"

    Path(mt1_path).mkdir(parents=True, exist_ok=True)
    Path(mt2_path).mkdir(parents=True, exist_ok=True)
    Path(final_path).mkdir(parents=True, exist_ok=True)
    
    for j in range(len(file_list)):
        mt1_list[i-1][j].to_csv(mt1_path+f"/{file_list[j]}.csv", index=False)
        mt2_list[i-1][j].to_csv(mt2_path+f"/{file_list[j]}.csv", index=False)
        final_list[i-1][j].to_csv(final_path+f"/{file_list[j]}.csv", index=False)