In [1]:
import os
import pandas as pd
import numpy as np

In [16]:
def get_participant_model_column(full_folder_path, participant_id, model):

    fname_list = []
    transcript_list = []
    participant_id_list = []
    sentence_ids_list = []
    iterations_list = []
    
    for file in os.listdir(full_folder_path):
        if 'whisper' not in file and file.endswith('.txt') and model in file:

            # adding to check for F or M in t2_F or t2_M
            # after checking t2, t2_F wav files are blank
            # only t2_M files contain content we want
            f_or_m = file.split('_')[1]

            if f_or_m == "M": #looking through only the M files
            
                fname_list.append(file)
                
                full_fname = os.path.join(full_folder_path, file)
                
                
                ids = file.split('_')[6:9]
    
                sentence_id = ids[0] + '_' + ids[1]
                sentence_ids_list.append(sentence_id)
    
                iteration = ids[2]
                iterations_list.append(iteration)
                
                with open(full_fname, 'r') as f:
                    transcript = f.read()
                    transcript = transcript.strip()
                    #print(transcript)
                    transcript_list.append(transcript)
    
                participant_id_list.append(participant_id)

    result_df = pd.DataFrame({'participant_id':participant_id_list})

    result_df['file_name'] = fname_list # kept for debugging

    result_df['Sentence ID'] = sentence_ids_list

    result_df['Iteration'] = iterations_list

    result_df[model] = transcript_list

    return result_df

In [17]:
get_participant_model_column('/Users/cogsci-lasrlab1/Desktop/MTAA_recording/t2/t2_sauce', 
                             't2',
                            'large')

Unnamed: 0,participant_id,file_name,Sentence ID,Iteration,large
0,t2,t2_M_MN_SAUCE_block4_sauce_86_1_L2_large-v3.txt,86_1,L2,The stoat took lessons to play the dinosaur.
1,t2,t2_M_MN_SAUCE_block4_sauce_97_3_L2_large-v3.txt,97_3,L2,"On Christmas, mommy left cookies for Santa."
2,t2,t2_M_MN_SAUCE_block4_sauce_85_2_L1_large-v3.txt,85_2,L1,I like to play badge with my clock.
3,t2,t2_M_MN_SAUCE_block4_sauce_97_1_L1_large-v3.txt,97_1,L1,"On Christmas, mommy left cookies for ice."
4,t2,t2_M_MN_SAUCE_block4_sauce_86_3_L1_large-v3.txt,86_3,L1,The girl took lessons to play the piano.
...,...,...,...,...,...
145,t2,t2_M_MN_SAUCE_block4_sauce_90_2_L1_large-v3.txt,90_2,L1,"When reset ends, the teacher rings the bell."
146,t2,t2_M_MN_SAUCE_block4_sauce_79_2_L2_large-v3.txt,79_2,L2,There are three tadrums in their carrots.
147,t2,t2_M_MN_SAUCE_block4_sauce_93_3_L1_large-v3.txt,93_3,L1,I plead allegiance to the flag.
148,t2,t2_M_MN_SAUCE_block4_sauce_82_1_L1_large-v3.txt,82_1,L1,I want to grip in my dog.


In [18]:
def merging_models_per_participant(full_folder_path, participant_id):
    df_merged = get_participant_model_column(
    full_folder_path, participant_id, 'large')
    
    model_list = ['base', 'medium', 'small', 'tiny']
    for model in model_list:
        model_df = get_participant_model_column(
            full_folder_path, participant_id, model)
        
        df_merged = df_merged.merge(
            model_df[['participant_id', 'Sentence ID', 'Iteration', model]],
            on = ['participant_id', 'Sentence ID', 'Iteration'],
            how = 'inner')

    return df_merged
    

In [19]:
participant_df = merging_models_per_participant('/Users/cogsci-lasrlab1/Desktop/MTAA_recording/t2/t2_sauce', 
                             't2')

In [20]:
def merge_with_targets(participant_df):
    target_sentence_df = pd.read_csv('target_sentences.csv')
    
    merged_df = participant_df.merge(
        target_sentence_df, on = 'Sentence ID', how = 'left')

    merged_df = merged_df[['participant_id', 'file_name', 'Sentence ID', 
                           'Iteration', 'probability', 'Expected Transcription',
                           'large', 'base', 'medium', 
                           'small', 'tiny']].reindex()
    return merged_df
    

In [22]:
merge_with_targets(participant_df)

Unnamed: 0,participant_id,file_name,Sentence ID,Iteration,probability,Expected Transcription,large,base,medium,small,tiny
0,t2,t2_M_MN_SAUCE_block4_sauce_86_1_L2_large-v3.txt,86_1,L2,nonword,The stirl took lessons to play the dinosaur.,The stoat took lessons to play the dinosaur.,Destroy Tick-cessons to play the Titan Zone.,The stove took lessons to play the dinosaur.,Distort two classes to play the dinosaur.,The stove took lessons to play the dinosaur.
1,t2,t2_M_MN_SAUCE_block4_sauce_97_3_L2_large-v3.txt,97_3,L2,high prob,"On Christmas, mommy left cookies for santa.","On Christmas, mommy left cookies for Santa.","On Christmas, mommy left cookies for Santa.",On Christmas mommy left cookies for Santa.,"On Christmas, mommy left cookies for Santa.","On Christmas, Mommy and Life Cook is for Santa."
2,t2,t2_M_MN_SAUCE_block4_sauce_85_2_L1_large-v3.txt,85_2,L1,nonword,I like to play betch with my clock.,I like to play badge with my clock.,I like to play badge with my clock.,I like to play badge with my clock.,I like to play badge with my clock.,I'd like to play Badge with my clock.
3,t2,t2_M_MN_SAUCE_block4_sauce_97_1_L1_large-v3.txt,97_1,L1,low prob,"On Christmas, mommy left cookies for eyes.","On Christmas, mommy left cookies for ice.","On Christmas, mommy left cookies for ice.","On Christmas, Mommy left cookies for us.","On Christmas, mommy left cookies for ice.","On Christmas, mommy and laugh cookies flies."
4,t2,t2_M_MN_SAUCE_block4_sauce_86_3_L1_large-v3.txt,86_3,L1,high prob,The girl took lessons to play the piano.,The girl took lessons to play the piano.,The grow took lessons to play the piano.,The girl took lessons to play the piano.,The grout took lessons to play the piano.,The grow took lessons to play the piano.
...,...,...,...,...,...,...,...,...,...,...,...
145,t2,t2_M_MN_SAUCE_block4_sauce_90_2_L1_large-v3.txt,90_2,L1,high prob,"When recess ends, the teacher rings the bell.","When reset ends, the teacher rings the bell.","When recess ends, the teacher rings the bell.","When recess ends, the teacher rings the bell.","When reset ends, the teacher rings the bell.","When recess ends, the teacher rings the bell."
146,t2,t2_M_MN_SAUCE_block4_sauce_79_2_L2_large-v3.txt,79_2,L2,nonword,There are three tedrooms in their carrot.,There are three tadrums in their carrots.,There are three bedrooms in their carrots.,There are three tadrooms in their carrots.,There are three tad rooms in their carrots.,There are three Tatrim's in their carrots.
147,t2,t2_M_MN_SAUCE_block4_sauce_93_3_L1_large-v3.txt,93_3,L1,high prob,I pledge allegiance to the flag.,I plead allegiance to the flag.,I plead allegiance to the flag.,I plead allegiance to the flag.,I plead allegiance to the flag.,I plead allegiance to the flag.
148,t2,t2_M_MN_SAUCE_block4_sauce_82_1_L1_large-v3.txt,82_1,L1,nonword,I want to breep in my dog.,I want to grip in my dog.,I want to breathe in my dog.,I want to burp in my dog.,I want to burp in my dog.,I want to breathe in my dog.


In [23]:
def full_txt_to_df(full_sauce_folder_path, participant_id, destination_path):
    participant_df = merging_models_per_participant(
        full_sauce_folder_path, participant_id)

    full_df = merge_with_targets(participant_df)
    
    full_df.to_csv(destination_path)

    return full_df

In [24]:
full_txt_to_df('/Users/cogsci-lasrlab1/Desktop/MTAA_recording/t2/t2_sauce', 
               't2', 
               '/Users/cogsci-lasrlab1/Desktop/MTAA_recording/t2/t2_sauce/t2_sauce_corrected.csv')


Unnamed: 0,participant_id,file_name,Sentence ID,Iteration,probability,Expected Transcription,large,base,medium,small,tiny
0,t2,t2_M_MN_SAUCE_block4_sauce_86_1_L2_large-v3.txt,86_1,L2,nonword,The stirl took lessons to play the dinosaur.,The stoat took lessons to play the dinosaur.,Destroy Tick-cessons to play the Titan Zone.,The stove took lessons to play the dinosaur.,Distort two classes to play the dinosaur.,The stove took lessons to play the dinosaur.
1,t2,t2_M_MN_SAUCE_block4_sauce_97_3_L2_large-v3.txt,97_3,L2,high prob,"On Christmas, mommy left cookies for santa.","On Christmas, mommy left cookies for Santa.","On Christmas, mommy left cookies for Santa.",On Christmas mommy left cookies for Santa.,"On Christmas, mommy left cookies for Santa.","On Christmas, Mommy and Life Cook is for Santa."
2,t2,t2_M_MN_SAUCE_block4_sauce_85_2_L1_large-v3.txt,85_2,L1,nonword,I like to play betch with my clock.,I like to play badge with my clock.,I like to play badge with my clock.,I like to play badge with my clock.,I like to play badge with my clock.,I'd like to play Badge with my clock.
3,t2,t2_M_MN_SAUCE_block4_sauce_97_1_L1_large-v3.txt,97_1,L1,low prob,"On Christmas, mommy left cookies for eyes.","On Christmas, mommy left cookies for ice.","On Christmas, mommy left cookies for ice.","On Christmas, Mommy left cookies for us.","On Christmas, mommy left cookies for ice.","On Christmas, mommy and laugh cookies flies."
4,t2,t2_M_MN_SAUCE_block4_sauce_86_3_L1_large-v3.txt,86_3,L1,high prob,The girl took lessons to play the piano.,The girl took lessons to play the piano.,The grow took lessons to play the piano.,The girl took lessons to play the piano.,The grout took lessons to play the piano.,The grow took lessons to play the piano.
...,...,...,...,...,...,...,...,...,...,...,...
145,t2,t2_M_MN_SAUCE_block4_sauce_90_2_L1_large-v3.txt,90_2,L1,high prob,"When recess ends, the teacher rings the bell.","When reset ends, the teacher rings the bell.","When recess ends, the teacher rings the bell.","When recess ends, the teacher rings the bell.","When reset ends, the teacher rings the bell.","When recess ends, the teacher rings the bell."
146,t2,t2_M_MN_SAUCE_block4_sauce_79_2_L2_large-v3.txt,79_2,L2,nonword,There are three tedrooms in their carrot.,There are three tadrums in their carrots.,There are three bedrooms in their carrots.,There are three tadrooms in their carrots.,There are three tad rooms in their carrots.,There are three Tatrim's in their carrots.
147,t2,t2_M_MN_SAUCE_block4_sauce_93_3_L1_large-v3.txt,93_3,L1,high prob,I pledge allegiance to the flag.,I plead allegiance to the flag.,I plead allegiance to the flag.,I plead allegiance to the flag.,I plead allegiance to the flag.,I plead allegiance to the flag.
148,t2,t2_M_MN_SAUCE_block4_sauce_82_1_L1_large-v3.txt,82_1,L1,nonword,I want to breep in my dog.,I want to grip in my dog.,I want to breathe in my dog.,I want to burp in my dog.,I want to burp in my dog.,I want to breathe in my dog.
