In [1]:
import csv
import pandas as pd
import os
import sys
import glob
import numpy as np
from datetime import datetime, timedelta

In [2]:
def open_sleeplog(filename, subj_id):
    
    sleeplog = pd.read_csv(filename)
    
    line = sleeplog.loc[sleeplog['ID'] == subj_id]
    
    return line



In [3]:

def csv_to_dataframe(path, pattern):
    """
    Recursively reads CSV files and stores them to another DataFrame.
    
    Parameters:
    path: Path to the directory containing CSV files.
    pattern: The pattern that the filenames of CSV files should contain.
    df: DataFrame to which the CSV data is appended. Default is an empty DataFrame.
    
    Returns:
    DataFrame: DataFrame with the data from all the CSV files in the given directory and its subdirectories.
    """
    df = []
    for root, _, files in os.walk(path):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if file_name.startswith(pattern) and file_name.endswith('.csv'):
                try:
                    csv_data = pd.read_csv(file_path)
                    timestamp = csv_data['timestamp']
                    subj_id = file_path[11:23]
                    
                    #line = open_sleeplog('all_sleeplogs.csv', subj_id)
                    
                except pd.errors.EmptyDataError:
                    pass
                
    return timestamp, subj_id


In [4]:

def get_dates(user_identifier):
    # Opening the csv file containing the timestamps
    df, subj_id = csv_to_dataframe('csv_files_new_timestamp', user_identifier)

    # Tranform df to a dataframe
    df = pd.DataFrame(df)

    # Ensure that the 'timestamp' column is in a proper datetime format
    df['new_timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%dT%H:%M:%S%z')

    # Getting only the date (dropping the info about the time)
    df['date'] = df['new_timestamp'].dt.date

    # Removing duplicated values for date and reseting index
    dates = df['date'].drop_duplicates().reset_index(drop=True)

    # Transform dates to a dataframe
    dates = pd.DataFrame(dates)
    
    return dates


In [5]:

def extract_sleep_wake_columns(sleeplog):
    # Extract even and odd column names
    sleeplog = pd.DataFrame(sleeplog)
    wakeup_columns = sleeplog.columns[2::2]  # Get even-indexed columns
    sleep_columns = sleeplog.columns[1::2]  # Get odd-indexed columns

    # Step 3: Create separate DataFrames for even and odd columns
    df_wakeup = sleeplog[wakeup_columns].copy()
    df_sleep = sleeplog[sleep_columns].copy()

    sleep = []
    wake = []

    for ii in range(0, len(df_wakeup)):
        sleep.append(df_sleep.iloc[ii])
        wake.append(df_wakeup.iloc[ii])

    return sleep, wake
    


In [6]:

def save_file():
    # Iterate over the columns of the sleeplog_df
    data_line = []
    data_line.append(subj_id)
    for ii in range(np.size(df_dates)):
        date_tmp = df_dates[ii]
        data_line.append(date_tmp)
        data_line.append(df_sleep.iloc[0,ii])
        data_line.append(df_wakeup.iloc[0,ii])

    f = open('new_sleeplog.csv', 'a')
    writer = csv.writer(f)
    writer.writerow(data_line)
    f.close()
    

In [7]:
def read_all_sleeplogs(path):
    """
    Reads CSV file with all sleeplogs and save only the ID (first column) to another dataframe)
    
    Parameters:
    path: Path to the directoru containing CSV files.
    
    Returns:
    DataFrame: DataFrame with the ID for all participants with sleeplog
    """
    
    csv_data = pd.read_csv(path)
    df_IDs = csv_data.iloc[:, 0]
    
    return df_IDs

In [8]:
df_IDs = read_all_sleeplogs('/Users/nathalia.esper/Documents/Ongoing_Projects/Kaggle/all_sleeplogs.csv')

In [48]:
# New code
# file should be like this:
# onset_N1, wakeup_N1, ...
# 2018-12-22T14:40, 2018-12-23T06:30

def find_identifier_in_csv(csv_file_path, user_identifier):
    try:
        with open(csv_file_path, 'r', newline='') as csvfile:
            csv_reader = csv.reader(csvfile)
            for row in csv_reader:
                if row and row[0] == str(user_identifier):
                    return row
    except FileNotFoundError:
        print(f"Error: The file '{csv_file_path}' was not found.")
    
    
csv_file_path = 'all_sleeplogs.csv'

for ii in range(len(df_IDs)):
    user_identifier = df_IDs[ii]
    print(user_identifier)

    row = find_identifier_in_csv(csv_file_path, user_identifier)
    
    # Opening the csv file containing the timestamps
    df, subj_id = csv_to_dataframe('csv_files_new_timestamp', user_identifier)
    # Tranform df to a dataframe
    df = pd.DataFrame(df)
    # Ensure that the 'timestamp' column is in a proper datetime format
    df['new_timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%dT%H:%M:%S%z', errors='coerce')
    # Getting only the date (dropping the info about the time)
    #df['date'] = df['new_timestamp'].dt.date
    df['new_timestamp'] = pd.to_datetime(df['new_timestamp'], format='%Y-%m-%d', utc=True)
    df['new_timestamp'] = df['new_timestamp'].dt.strftime('%Y-%m-%d')
    # Removing duplicated values for date and reseting index
    dates = df['new_timestamp'].drop_duplicates().reset_index(drop=True)
    # Transform dates to a dataframe
    df_dates = pd.DataFrame(dates)
    
    df_row = pd.DataFrame(row)
    df_row = df_row.transpose()

    wakeup_columns = df_row.columns[2::2]  # Get even-indexed columns
    sleep_columns = df_row.columns[1::2]  # Get odd-indexed columns

    df_wakeup = df_row[wakeup_columns].copy()
    df_sleep = df_row[sleep_columns].copy()
    
    data_line = []
    data_line.append(user_identifier)
    for ii in range(np.size(df_dates)):
        print(df_sleep.iloc[0, ii])
        if df_sleep.iloc[0,ii] <= "23:59:59":
            print("S-1")
            sleep_date_temp = df_dates.iloc[ii]
        else:
            try:
                print("S-2")
                sleep_date_temp = df_dates.iloc[ii+1]
            except:
                print("S-3")
                sleep_date_temp = df_dates.iloc[ii]
        
        if df_wakeup.iloc[0,ii] <= "23:59:59":
            wake_date_temp = df_dates.iloc[ii]
        else:
            try:
                wake_date_temp = df_dates.iloc[ii+1]
            except:
                wake_date_temp = df_dates.iloc[ii]
                
        
        sleep_temp = sleep_date_temp + 'T' + df_sleep.iloc[0,ii]
        wake_temp = wake_date_temp + 'T' + df_wakeup.iloc[0,ii]
        data_line.append(sleep_temp)
        data_line.append(wake_temp)


    f = open('new_sleeplog_v3.csv', 'a')
    writer = csv.writer(f)
    writer.writerow(data_line)
    f.close()

NDARAB055BPR
22:35:00
S-1
22:45:00
S-1
1:50:00
S-1
5:45:00
S-2
5:30:00
S-2
1:50:00
S-1
1:50:00
S-1
3:00:00
S-2
22:52:00
S-1
1:50:00
S-1
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
1:07:00
S-1
0:54:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
0:20:00
S-2
1:50:00
S-1
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
3:00:00
S-2
S-3
NDARAB348EWR


KeyboardInterrupt: 

## Old Code

In [9]:
# old code that add the date before the sleep onset
def find_identifier_in_csv(csv_file_path, user_identifier):
    try:
        with open(csv_file_path, 'r', newline='') as csvfile:
            csv_reader = csv.reader(csvfile)
            for row in csv_reader:
                if row and row[0] == str(user_identifier):
                    return row
    except FileNotFoundError:
        print(f"Error: The file '{csv_file_path}' was not found.")
    

# Example usage:
csv_file_path = 'all_sleeplogs.csv'

for ii in range(len(df_IDs)):
    user_identifier = df_IDs[ii]
    print(user_identifier)

    row = find_identifier_in_csv(csv_file_path, user_identifier)
    
    # Opening the csv file containing the timestamps
    df, subj_id = csv_to_dataframe('csv_files_new_timestamp', user_identifier)
    # Tranform df to a dataframe
    df = pd.DataFrame(df)
    # Ensure that the 'timestamp' column is in a proper datetime format
    df['new_timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%dT%H:%M:%S%z', errors='coerce')
    # Getting only the date (dropping the info about the time)
    #df['date'] = df['new_timestamp'].dt.date
    df['new_timestamp'] = pd.to_datetime(df['new_timestamp'], format='%Y-%m-%d', utc=True)
    df['new_timestamp'] = df['new_timestamp'].dt.strftime('%Y-%m-%d')
    # Removing duplicated values for date and reseting index
    dates = df['new_timestamp'].drop_duplicates().reset_index(drop=True)
    # Transform dates to a dataframe
    df_dates = pd.DataFrame(dates)
    
    df_row = pd.DataFrame(row)
    df_row = df_row.transpose()

    wakeup_columns = df_row.columns[2::2]  # Get even-indexed columns
    sleep_columns = df_row.columns[1::2]  # Get odd-indexed columns

    df_wakeup = df_row[wakeup_columns].copy()
    df_sleep = df_row[sleep_columns].copy()

    #sleep = []
    #wake = []

    #for ii in range(0, len(df_wakeup)):
    #    sleep.append(df_sleep.iloc[ii])
    #    wake.append(df_wakeup.iloc[ii])
    
    data_line = []
    data_line.append(user_identifier)
    for ii in range(np.size(df_dates)):
        date_tmp = df_dates.iloc[ii]
        data_line.append(date_tmp)
        data_line.append(df_sleep.iloc[0,ii])
        data_line.append(df_wakeup.iloc[0,ii])

    f = open('new_sleeplog_v2.csv', 'a')
    writer = csv.writer(f)
    writer.writerow(data_line)
    f.close()
    



NDARAB055BPR
NDARAB348EWR
NDARAB696MF4


KeyboardInterrupt: 