In [25]:
import os
import pandas as pd
import numpy as np

In [26]:
def get_participant_model_column(full_folder_path, participant_id, model):
    
    transcript_list = []
    participant_id_list = []
    sentence_ids_list = []
    iterations_list = []
    
    for file in os.listdir(full_folder_path):
        if 'whisper' not in file and file.endswith('.txt') and model in file:
            
            full_fname = os.path.join(full_folder_path, file)
            ids = file.split('_')[6:9]
            sentence_id = ids[0] + '_' + ids[1]
            sentence_ids_list.append(sentence_id)

            iteration = ids[2]
            iterations_list.append(iteration)
            
            with open(full_fname, 'r') as f:
                transcript = f.read()
                transcript = transcript.strip()
                #print(transcript)
                transcript_list.append(transcript)

            participant_id_list.append(participant_id)

    result_df = pd.DataFrame({'participant_id':participant_id_list})

    result_df['Sentence ID'] = sentence_ids_list

    result_df['Iteration'] = iterations_list

    result_df[model] = transcript_list

    return result_df

In [28]:
def merging_models_per_participant(full_folder_path, participant_id):
    df_merged = get_participant_model_column(
    full_folder_path, participant_id, 'large')
    
    model_list = ['base', 'medium', 'small', 'tiny']
    for model in model_list:
        model_df = get_participant_model_column(
            full_folder_path, participant_id, model)
        
        df_merged = df_merged.merge(
            model_df[['participant_id', 'Sentence ID', 'Iteration', model]],
            on = ['participant_id', 'Sentence ID', 'Iteration'],
            how = 'inner')

    return df_merged
    

In [76]:
def merge_with_targets(participant_df):
    target_sentence_df = pd.read_csv('target_sentences.csv')
    
    merged_df = participant_df.merge(
        target_sentence_df, on = 'Sentence ID', how = 'left')

    merged_df = merged_df[['participant_id', 'Sentence ID', 
                           'Iteration', 'probability', 'Expected Transcription',
                           'large', 'base', 'medium', 
                           'small', 'tiny']].reindex()
    return merged_df
    

In [77]:
def full_txt_to_df(full_sauce_folder_path, participant_id, destination_path):
    participant_df = merging_models_per_participant(
        full_sauce_folder_path, participant_id)

    full_df = merge_with_targets(participant_df)
    
    full_df.to_csv(destination_path)

    return full_df

In [79]:
full_txt_to_df('/Users/cogsci-lasrlab1/Desktop/MTAA_recording/t1/t1_sauce', 
               't1', 
               '/Users/cogsci-lasrlab1/Desktop/MTAA_recording/t1/t1_sauce/t1_sauce.csv')

Unnamed: 0,participant_id,Sentence ID,Iteration,probability,Expected Transcription,large,base,medium,small,tiny
0,t1,67_3,L1,high prob,The caterpillar turned into a butterfly.,The caterpillar turned into a butterfly.,The caterpillar turned into a butterfly.,The caterpillar turned into a butterfly.,The caterpillar turned into a butterfly.,The caterpillar turned into a butterfly.
1,t1,76_1,L1,low prob,The man checked the time on his stairs.,The man checked the time on his stairs.,The man checked the time on his stairs.,The man checked the time on his stairs.,The man checked the time on his stairs.,The man checked the time on his stairs.
2,t1,29_3,L1,nonword,The tift was inside the zebra.,The tift was inside the zebra.,The tipped was inside the zebra.,The tipped was inside the zebra.,The tipped was inside the zebra.,The tip was inside the zebra.
3,t1,75_2,L2,high prob,"To drive the car, daddy turns the wheel.","To drive the car, Daddy turns the wheel.","To drive the car, daddy turns the wheel.","To drive the car, daddy turns the wheel.","To drive the car, Daddy turns the wheel.","To drive the car, Daddy turns the wheel."
4,t1,38_1,L1,nonword,I fixed the torn mage with a piece of book.,I fixed the torn mage with a piece of book.,I fix the tour in beige with a piece of book.,I fixed the torn mage with a piece of book.,I fixed the Torum Mage with a piece of book.,I fixed the tour of the beach with a piece of ...
...,...,...,...,...,...,...,...,...,...,...
661,t1,63_3,L2,nonword,I fell in the hud and have to put on a new but...,I fell in the hut and have to put on a new but...,I fell in the HUD and have to put on a new but...,I fell in the HUD and have to put on a new but...,I fell in the HUD and have to put on a new but...,I fell in the HUD and have to put on a new but...
662,t1,71_2,L1,nonword,I have a woon but I need a web.,I have a wound but I need a web.,I have a wound but I need a web.,I have a wound but I need a web.,I have a wound but I need a web.,"I have to win, but I need a web."
663,t1,9_2,L1,nonword,I put shamTROO in my pig.,I put Shamchu in my pig.,I put Sham True in my pig.,I put Shem True in my pig.,I put Sham Tru in my pig.,I put sham-true in my pig.
664,t1,98_2,L2,nonword,"There is one hathroom for the boys, and one fo...",There is one half room for the boys and one fo...,There is one bathroom for the boys and one for...,There is one hathrum for the boys and one for ...,There is one half room for the boys and one fo...,There is one bathroom for the boys and one for...
