In [123]:
import os
import pandas as pd
import numpy as np

In [124]:
def get_participant_model_column(full_folder_path, participant_id, model):

    fname_list = []
    transcript_list = []
    participant_id_list = []
    sentence_ids_list = []
    iterations_list = []
    
    for file in os.listdir(full_folder_path):
        if 'whisper' not in file and file.endswith('.txt') and model in file:
            
            fname_list.append(file)
            
            full_fname = os.path.join(full_folder_path, file)
            ids = file.split('_')[6:9]
            sentence_id = ids[0] + '_' + ids[1]
            sentence_ids_list.append(sentence_id)

            iteration = ids[2]
            iterations_list.append(iteration)
            
            with open(full_fname, 'r') as f:
                transcript = f.read()
                transcript = transcript.strip()
                #print(transcript)
                transcript_list.append(transcript)

            participant_id_list.append(participant_id)

    result_df = pd.DataFrame({'participant_id':participant_id_list})

    result_df['file_name'] = fname_list # kept for debugging

    result_df['Sentence ID'] = sentence_ids_list

    result_df['Iteration'] = iterations_list

    result_df[model] = transcript_list

    return result_df

In [125]:
def merging_models_per_participant(full_folder_path, participant_id):
    df_merged = get_participant_model_column(
    full_folder_path, participant_id, 'large')
    
    model_list = ['base', 'medium', 'small', 'tiny']
    for model in model_list:
        model_df = get_participant_model_column(
            full_folder_path, participant_id, model)
        
        df_merged = df_merged.merge(
            model_df[['participant_id', 'Sentence ID', 'Iteration', model]],
            on = ['participant_id', 'Sentence ID', 'Iteration'],
            how = 'inner')

    return df_merged
    

In [126]:
def merge_with_targets(participant_df):
    target_sentence_df = pd.read_csv('target_sentences.csv')
    
    merged_df = participant_df.merge(
        target_sentence_df, on = 'Sentence ID', how = 'left')

    merged_df = merged_df[['participant_id', 'Sentence ID', 
                           'Iteration', 'probability', 'Expected Transcription',
                           'large', 'base', 'medium', 
                           'small', 'tiny']].reindex()
    return merged_df
    

In [127]:
def full_txt_to_df(full_sauce_folder_path, participant_id, destination_path):
    participant_df = merging_models_per_participant(
        full_sauce_folder_path, participant_id)

    full_df = merge_with_targets(participant_df)
    
    full_df.to_csv(destination_path)

    return full_df

In [135]:
full_txt_to_df('/Users/cogsci-lasrlab1/Desktop/MTAA_recording/t13/t13_sauce', 
               't13', 
               '/Users/cogsci-lasrlab1/Desktop/MTAA_recording/t13/t13_sauce/t13_sauce.csv')


Unnamed: 0,participant_id,Sentence ID,Iteration,probability,Expected Transcription,large,base,medium,small,tiny
0,t13,38_3,R5P1,low prob,I fixed the torn page with a piece of book.,I fixed the torn page with a piece of book.,I fixed the torn page with a piece of book.,I fixed the torn page with a piece of book.,I fixed the torn page with a piece of book.,I fixed the torn page with a piece of book.
1,t13,14_3,L2,nonword,"To play mug of war, we pulled on the grass.","To play mug of war, we pulled on the grass.",To play Mug of War we pulled on the grass.,"To play Mug of War, we pulled on the grass.",To play mug of war we pulled on the grass.,To play Mug of War we pulled on the grass.
2,t13,48_2,L1,low prob,We moved our furniture on the back of a flower.,We've moved our furniture on the back of a flo...,We've moved our furniture on the back of the f...,We've moved our furniture on the back of a flo...,We've moved our furniture on the back of a flo...,We've moved our furniture on the back of a flo...
3,t13,72_1,L1,nonword,The coy has a toy choo-choo leaf.,The koi has a toy choo-choo leaf.,The coin has a toy choo cho leaf.,The koi has a toy choo choo leaf.,The koi has a toy chuchu leaf.,The Coi has a toy to live.
4,t13,63_3,L1,nonword,I fell in the hud and have to put on a new but...,I fell in the hut and have to put on a new but...,I fell in the HUD and have to put on a new but...,I fell in the HUD and have to put on a new but...,I fell in the HUD and have to put on a new but...,I fell in the hood and have to put on a new bu...
...,...,...,...,...,...,...,...,...,...,...
679,t13,29_3,L2,nonword,The tift was inside the zebra.,The tift was inside the zebra.,The tent was inside the zebra.,The tipped was inside the zebra.,The tiff was inside the zebra.,The TFT was inside the zebra.
680,t13,75_2,L1,high prob,"To drive the car, daddy turns the wheel.","To drive the cart, daddy turns the wheel.","To drive the car, Daddy turns the wheel.","To drive the car, daddy turns the wheel.","To drive the car, Daddy turns the wheel.","To drive the car, that he turns the wheel."
681,t13,38_1,L2,nonword,I fixed the torn mage with a piece of book.,I fix the torn page with a piece of book.,I fixed the Torpate Mage with a piece of book.,I fixed the Torp Mages with a piece of book.,I fixed the tour page with a piece of book.,I fixed the tour page with the piece of book.
682,t13,16_2,R2P1,high prob,On the ceiling I saw a creepy-crawly spider.,"On the ceiling, I saw a creepy crawling spider.",On the selling I saw a creepy curling spider.,On the selling I saw a creepy crawling spider.,On the ceiling I saw a creepy curling spider.,On the selling I saw a creepy crawling spider.
