# Calculating result stats

This notebook goes through various calculations of the results of the survey.

In [48]:
import re
from os import listdir, walk
from os.path import join, expanduser, exists, basename, isdir, splitext, abspath, split
import pandas as pd
import numpy as np

In [2]:
# full results for both abi and arctic
ac_results = pd.read_csv('results.tsv', sep='\t')

## ABI Stats

In [3]:
abi_results = ac_results.iloc[:, :23]

In [4]:
abi_results.head(1)

Unnamed: 0,Timestamp,Do you consider yourself a native speaker of English?,Please select the first option.,Group #1: Is clip X most similar to A or B?,Group #2: Is clip X most similar to A or B?,Group #3: Is clip X most similar to A or B?,Group #4: Is clip X most similar to A or B?,Group #5: Is clip X most similar to A or B?,Group #6: Is clip X most similar to A or B?,Group #7: Is clip X most similar to A or B?,...,Pair #1: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #2: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #3: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #4: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #5: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #6: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #7: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #8: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #9: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity],Pair #10: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity]
0,25/12/2018 13:30:17,No,GLA M: https://kennywlino.github.io/msc_thesis...,A,B,A,A,A,B,B,...,Definitely the same speaker 7,4,-4,4,5,5,Definitely a different speaker -7,-1,1,5


### Perceived Accent

In [10]:
!pwd

/Users/kennylino/Documents/em_lct/UoM/thesis/experiments/results


In [32]:
ABI_ROOT = join(expanduser("~"), "Documents/em_lct/UoM/thesis/experiments", "abi")
!ls $ABI_ROOT

[34mfemale[m[m [34mmale[m[m


In [105]:
# a dictionary that contains speaker ID : list of correct answers for the PA taslk
ABI_PA_ans = {}

In [97]:
# creates list of correct answers for PA task
# e.g EAN F : ['B', 'A', 'A', ...]

def create_ans_list_PA(file):
    correct_ans = []
    speaker_ID = (basename(file)[:3]).lower()
    with open(file, 'r') as f:
        for line_num, line in enumerate(f):
            if line_num % 4 == 0: # only checks 1st line in each ABX group
                regex = re.compile('ABI-1/(\w+)')
                line_ID = regex.search(line).group(1) # ex. sse_001, ean_001
                if speaker_ID in line_ID:
                    correct_ans.append('A')
                else:
                    correct_ans.append('B')
    return(correct_ans)

In [106]:
for root, dirs, files in walk(ABI_ROOT):
    if (basename(root)) == 'test_PA' or (basename(root)) == 'test_SI':
        for f in files:
            if f.endswith('test_PA.txt'):
                test_PA_ans_list = create_ans_list_PA(join(root, f))
                dict_ID = f[:5].upper() # ex. GLA_M, EAN_F
                ABI_PA_ans[dict_ID] = test_PA_ans_list

In [107]:
print(ABI_PA_ans)

{'GLA_M': ['B', 'A', 'B', 'B', 'B', 'A', 'A', 'A', 'A', 'A'], 'LAN_M': ['A', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'A', 'A'], 'GLA_F': ['B', 'B', 'B', 'A', 'B', 'B', 'A', 'A', 'B', 'B'], 'EAN_F': ['B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A', 'A'], 'LAN_F': ['B', 'B', 'A', 'B', 'B', 'A', 'B', 'B', 'B', 'B']}


In [None]:
# creates list of correct answers for SI task
# e.g EAN F : ['B', 'A', 'A', ...]

def create_ans_list_SI(file):
with open(file, 'r') as f:
    for line_num, line in enumerate(f):
        correct_ans = []
        if line_num % 3 == 0: # only checks 1st line in each ABX group
            regex = re.compile('ABI-1/(\w+)')
            line_ID = regex.search(line).group(1)) # ex. sse_001, ean_001
            if line_ID contains speaker_ID:
                correct_ans.append('-7')
            else:
                correct_ans.append('7')

## ARCTIC Stats

In [7]:
arc_results = ac_results.drop(ac_results.columns[2:23], axis=1)

In [8]:
arc_results.head(1)

Unnamed: 0,Timestamp,Do you consider yourself a native speaker of English?,Please select the first option..1,Group #1: Is clip X most similar to A or B?.1,Group #2: Is clip X most similar to A or B?.1,Group #3: Is clip X most similar to A or B?.1,Group #4: Is clip X most similar to A or B?.1,Group #5: Is clip X most similar to A or B?.1,Group #6: Is clip X most similar to A or B?.1,Group #7: Is clip X most similar to A or B?.1,...,Pair #1: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #2: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #3: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #4: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #5: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #6: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #7: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #8: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #9: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1,Pair #10: Rate the pair of audio clips on a scale of -7 (definitely a different speaker) to 7 (definitely the same speaker). [Similarity].1
0,25/12/2018 13:30:17,No,SPANISH F: https://kennywlino.github.io/msc_th...,B,A,B,B,B,B,A,...,Definitely the same speaker 7,-4,6,-1,6,5,6,Definitely a different speaker -7,4,-3
