In [0]:
import pandas as pd
import re

In [0]:
# define column names
col_names = ['timestamp', 'entry_type', 'message']

# read the log file into a DataFrame
df_log = pd.read_csv('/Users/nadou/Projects/there_will_be_beeps/data/raw_data/moiern/Continuous/moiern_Continuous_2022-07-08_14h22.59.036.log', sep='\t', names=col_names)

# Remove leading and trailing whitespaces
df_log = df_log.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [32]:
# group by 'Created sequence:'
sequence_groups = df_log.groupby(df_log.message.str.contains('Created sequence:').cumsum())

# initialize an empty list to hold the data for the new DataFrame
data = []

# loop through each sequence group
for sequence, sequence_group in sequence_groups:
    # keep lines containing 'New trial' or 'Sound' or 'Keypress'
    sequence_group = sequence_group.loc[sequence_group.message.str.contains('New trial|Sound|Keypress')]

    # find the index of the first 'New trial' line
    new_trial_indices = sequence_group[sequence_group.message.str.contains('New trial')].index

    # if no 'New trial' line is found, skip to the next sequence
    if new_trial_indices.empty:
        continue

    first_new_trial_index = new_trial_indices[0]


    # slice the DataFrame from the first 'New trial' line
    sequence_group = sequence_group.loc[first_new_trial_index:]

    # find 'New trial' groups within each sequence group
    trial_groups = sequence_group.groupby(sequence_group.message.str.contains('New trial').cumsum())

    # loop through each trial group
    for trial_name, trial_group in trial_groups:

        # get the repetition number from the 'New trial' message
        rep_match = re.search(r'rep=(\d+)', trial_group['message'].values[0])
        rep_number = int(rep_match.group(1))

        # if 'Sound probe started' is not in the group's messages, skip to the next iteration
        if 'Sound probe started' not in trial_group['message'].values:
            continue

        # if the repetition number is less than 2, skip to the next iteration
        if rep_number < 2:
            continue

        # get the timestamp from the first row of the trial group
        timestamp = trial_group['timestamp'].values[0]

        # check if a row exists in trial_group containing 'Keypress: space' and assign the result to 'responses'
        responses = 1 if 'Keypress: space' in trial_group['message'].values else 0

        # create a dictionary with the data for the current trial and append it to the list
        data.append({'timestamp': timestamp, 'rep_number': rep_number, 'responses': responses})

# create a new DataFrame from the list of dictionaries
df_trials = pd.DataFrame(data)

# print the new DataFrame
df_trials.head()

Unnamed: 0,timestamp,rep_number,responses
0,53.2265,2,0
1,56.244,3,1
2,57.7491,4,1
3,59.7486,5,1
4,62.8399,6,1


In [25]:
# read the csv file into a DataFrame
df_csv = pd.read_csv('/Users/nadou/Projects/there_will_be_beeps/data/raw_data/moiern/Continuous/moiern_Continuous_2022-07-08_14h22.59.036_1.csv')

df_csv = df_csv.loc[df_csv['trials.thisRepN'] > 1, ['sweeps.thisN', 'trials.thisN', 'Prediction', 'Frequency', 'Volume', 'isCatchTrial', 'feedback.keys', 'feedback.rt', 'probe.started', 'probe.stopped', 'feedback.started', 'ISI.started', 'ISI.stopped', 'Delay', 'participant', 'Resume previous experiment', 'expName']]

# show the first few rows of the DataFrame
df_csv.head()

Unnamed: 0,sweeps.thisN,trials.thisN,Prediction,Frequency,Volume,isCatchTrial,feedback.keys,feedback.rt,probe.started,probe.stopped,feedback.started,ISI.started,ISI.stopped,Delay,participant,Resume previous experiment,expName
2,0.0,2.0,frequency,190.0,-4.270012,0.0,,,0.024492,,53.253687,0.226495,3.001244,2.774749,moiern,0.0,Continuous
3,0.0,3.0,frequency,213.0,-1.262547,0.0,,,0.025795,,56.272221,0.230251,1.488649,1.258398,moiern,0.0,Continuous
4,0.0,4.0,frequency,239.0,-6.904236,0.0,['space'],[-1.0566053999355063],0.023493,,57.774359,0.229741,1.989482,1.759741,moiern,0.0,Continuous
5,0.0,5.0,frequency,268.0,-6.058501,0.0,['space'],[-1.3378160999855027],0.025077,,59.775985,0.229364,3.081493,2.852128,moiern,0.0,Continuous
6,0.0,6.0,frequency,301.0,1.492714,0.0,['space'],[-2.50034169992432],0.02534,,62.866273,0.232652,1.791441,1.558789,moiern,0.0,Continuous


In [31]:
# select the required columns and rename them
df_trials_csv = df_csv[['feedback.started', 'trials.thisN', 'feedback.rt']].rename(columns={
    'feedback.started': 'timestamp',
    'trials.thisN': 'rep_number',
    'feedback.rt': 'responses'
})

# show the first few rows of the new DataFrame
df_trials_csv.head()

Unnamed: 0,timestamp,rep_number,responses
2,53.253687,2.0,
3,56.272221,3.0,
4,57.774359,4.0,[-1.0566053999355063]
5,59.775985,5.0,[-1.3378160999855027]
6,62.866273,6.0,[-2.50034169992432]
