# Data analysis

## Setup and imports

In [None]:
from google.colab import auth
from google.auth import default
auth.authenticate_user()
import requests
import gspread
creds, _ = default()
gc = gspread.authorize(creds)

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/'9.66 proj'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/9.66 proj


In [None]:
import pandas as pd
import numpy as np
import pickle

sh = gc.open('9.66 data collection (Responses)')
worksheet = sh.sheet1
values_list = worksheet.get_all_values()
responses_df = pd.DataFrame(values_list)

# df.head()

In [None]:
responses_df.mask(responses_df == 'Yes', 1, inplace=True)
responses_df.mask(responses_df == 'No', 0, inplace=True)
responses_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,52,53,54,55,56,57,58,59,60,61
0,Timestamp,Please enter the participant ID you have been ...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...,Did you hear the following sentence in the aud...
1,12/10/2022 17:16:38,testing,0,1,1,0,1,1,0,1,...,0,0,1,0,0,1,1,0,0,1
2,12/10/2022 23:12:24,18,0,1,0,1,1,0,1,1,...,0,1,1,0,1,1,0,0,1,1
3,12/11/2022 0:09:44,11,0,0,0,1,0,1,0,1,...,0,1,1,0,0,1,1,1,1,0
4,12/12/2022 21:07:27,12,0,1,1,0,1,1,0,1,...,0,0,0,1,0,1,1,0,1,1


In [None]:
with open('sents/sents_list.pckl','rb') as infile:
  all_sents = pickle.load(infile)
sent_to_file = {sent:f'sents/{i}.wav' for i, sent in enumerate(all_sents)}

In [None]:
exp_ap = pd.read_csv('active_passive.csv')
exp_pf = pd.read_csv('past_future.csv')
exp_tl = pd.read_csv('time_location.csv')

In [None]:
exp_ap['type'] = 'ap'
exp_pf['type'] = 'pf'
exp_tl['type'] = 'tl'

In [None]:
fillers_all = [['content_id', 'sentence_id', 'sentence', 'type']]
for i in range(50, 74):
  fillers_all.append([i-38, i, all_sents[i], 'filler'])
fillers_all = pd.DataFrame(fillers_all[1:], columns=fillers_all[0])

In [None]:
exp_all = pd.concat((exp_ap, exp_tl, exp_pf, fillers_all))
exp_all.head()

Unnamed: 0.1,Unnamed: 0,content_id,sentence_id,sentence,plausible,rev,act,type,time
0,0.0,0,0,热狗把男孩吃了。,0.0,0.0,把,ap,
1,1.0,0,1,男孩把热狗吃了。,1.0,1.0,把,ap,
2,2.0,0,2,热狗被男孩吃了。,1.0,0.0,被,ap,
3,3.0,0,3,男孩被热狗吃了。,0.0,1.0,被,ap,
4,4.0,0,4,热狗让男孩吃了。,1.0,0.0,让,ap,


## Analysis

We want accuracy rates for the following categories:
record number of yes, number of correct answers, mb rates 
- filler items
  - filler accuracy rate
  - distractor accuracy rate
- experiment items
  - plausible
    - plausible -> plausible
    - plausible -> implausible
  - implausible
    - implausible -> plausible
    - implausible -> implausible

In [None]:
# given the list of audio sentences, selection option sentences, and participant's answers
# compile their responses into 3 dataframes for filler items, distractor items, and experiment items respectively
# dataframes contain info such as participant's answer, whether answer is correct, and whether plausible/implausible item was presented in audio
def process_block(audio_list, selection_list, answers):
  audio_df = exp_all[exp_all['sentence'].isin(audio_list)].copy()
  # we have to do this differently to accomodate repeated sentences - make df of sentences then merge in exp_all
  selection_df = pd.DataFrame(selection_list,columns=['sentence'])
  selection_df = pd.merge(selection_df, exp_all, how='left', on='sentence')
  selection_df['answer'] = len(selection_list)*[np.nan]

  for i in range(len(selection_list)):
    sent = selection_list[i]
    selection_df.loc[selection_df['sentence'] == sent, 'answer'] = answers[i]

  filler_resp = selection_df[(selection_df['content_id'].isin(audio_df['content_id'])) & (selection_df['type']=='filler')].copy()

  distractor_resp = selection_df[(~selection_df['content_id'].isin(audio_df['content_id']))].copy()

  exp_resp = selection_df[(selection_df['content_id'].isin(audio_df['content_id'])) & (selection_df['type']!='filler')].copy()
  exp_resp['is_correct'] = len(exp_resp)*[False]
  exp_resp.loc[(exp_resp['sentence'].isin(audio_list)) & (exp_resp['answer'] == 1),'is_correct'] = True
  exp_resp.loc[(~exp_resp['sentence'].isin(audio_list)) & (exp_resp['answer'] == 0),'is_correct'] = True
  exp_resp = pd.merge(exp_resp, audio_df[['content_id','plausible']], how='left', on='content_id', suffixes=('_selection','_audio'))

  # print(audio_list)
  # print(selection_list)
  # print(audio_df)
  # print(selection_df)
  # print(answers)
  # print(filler_resp)
  # print(distractor_resp)
  # print(exp_resp)
  return filler_resp, distractor_resp, exp_resp

In [None]:
# helper function to take key (label for data), val (corresponding subset of a dataframe), stats (dictionary for output)
# and calculate accuracy precision recall
def calc_apr(key, val, stats):
  n = len(val)
  if n == 0:
    return False
  stats[f'n_{key}'] = n
  correct = val['is_correct'].value_counts().get(True,0)
  stats[f'{key}_correct'] = correct
  stats[f'{key}_yes'] = val['answer'].value_counts().get(1,0)
  stats[f'{key}_accuracy'] = correct/n

  correct_counts = val[val['is_correct']==True]['answer'].value_counts()
  true_pos = correct_counts.get(1,0)
  true_neg = correct_counts.get(0,0)
  incorrect_counts = val[val['is_correct']==False]['answer'].value_counts()
  false_pos = incorrect_counts.get(1,0)
  false_neg = incorrect_counts.get(0,0)
  try:
    precision = true_pos / (true_pos + false_pos)
  except ZeroDivisionError:
    precision = 0
  try:
    recall = true_pos / (true_pos + false_neg)
  except ZeroDivisionError:
    recall = 0
  stats[f'{key}_np'] = true_pos + false_pos
  stats[f'{key}_nr'] = true_pos + false_neg
  stats[f'{key}_precision'] = precision
  stats[f'{key}_recall'] = recall

  try:
    plausible_audio = val[val['plausible_audio'] == 1]
    implausible_audio = val[val['plausible_audio'] == 0]
    stats[f'{key}_plpl'] = len(plausible_audio[plausible_audio['plausible_selection'] == 1]) #plausible guessed correctly
    stats[f'{key}_impl'] = len(implausible_audio[implausible_audio['plausible_selection'] == 1]) #implausible guessed plausible
    stats[f'{key}_plim'] = len(plausible_audio[plausible_audio['plausible_selection'] == 0]) #plausible guessed implausible
    stats[f'{key}_imim'] = len(implausible_audio[implausible_audio['plausible_selection'] == 0]) #implausible guessed correctly
  except:
    pass
  

In [None]:
# take aggregated responses and grade them
# filler_resp, distractor_resp, exp_resp can come from a block, participant, or set of participants
def grade_responses(filler_resp, distractor_resp, exp_resp):
  # exp_resp = exp_resp[exp_resp['act']!='让']
  filler_resp['is_correct'] = len(filler_resp)*[False]
  filler_resp.loc[filler_resp['answer'] == 1, 'is_correct'] = True

  distractor_resp['is_correct'] = len(distractor_resp)*[False]
  distractor_resp.loc[distractor_resp['answer'] == 0, 'is_correct'] = True

  resp = {'filler':filler_resp, 'distractor':distractor_resp, 'exp':exp_resp}
  types = ['ap','tl','pf']

  stats = {}
  for key, val in resp.items():
    calc_apr(key, val, stats)
  for t in types:
    val = exp_resp[exp_resp['type']==t]
    key = f'exp_{t}'
    calc_apr(key, val, stats)
    # for config in ((0,0),(0,1),(1,0),(0,0)):
    #   resp = val[(val['plausible_audio']==config[0])&(val['plausible_selection']==config[1])]
    #   label = f'{key}_{config}'
    #   calc_apr(label, resp, stats)
    for config in (0,1):
      resp = val[val['plausible_audio']==config]
      label = f'{key}_{config}'
      calc_apr(label, resp, stats)

  # stats['exp_ap_correct'] = exp_resp[exp_resp['type']=='ap']['is_correct'].value_counts()[True]
  return stats

In [None]:
# compile informational dataframes for a single participant (row)
def process_participant(row, pid=None):
  # input: row from above df, pid option for testing
  if pid is None:
    pid = row[1].zfill(2)
  
  with open(f'conditions_data/audio_{pid}.pckl','rb') as infile:
    audio_list = pickle.load(infile)

  with open(f'conditions_data/selection_{pid}.pckl','rb') as infile:
    selection_list = pickle.load(infile)
  
  filler_resp = pd.DataFrame()
  distractor_resp = pd.DataFrame()
  exp_resp = pd.DataFrame()

  # TODO: fix to all blocks
  for i in range(6):
    block_audio = audio_list[i]
    block_selection = selection_list[i]
    block_answers = list(row[10*i+2:10*(i+1)+2])
    block_filler_resp, block_distractor_resp, block_exp_resp = process_block(block_audio, block_selection, block_answers)
    filler_resp = pd.concat((filler_resp, block_filler_resp))
    distractor_resp = pd.concat((distractor_resp, block_distractor_resp))
    exp_resp = pd.concat((exp_resp, block_exp_resp))
  
  return filler_resp, distractor_resp, exp_resp
  
  # stats = grade_responses(filler_resp, distractor_resp, exp_resp)
  # return stats

In [None]:
# compile informational dataframes for all participants
# num_testing is used to exclude testing responses
def process_all_participants(responses_df, num_testing=1):
  filler_resp = pd.DataFrame()
  distractor_resp = pd.DataFrame()
  exp_resp = pd.DataFrame()

  for index, row in responses_df.iterrows():
    if index <= num_testing:
      continue
    pfr, pdr, per = process_participant(row)
    filler_resp = pd.concat((filler_resp, pfr))
    distractor_resp = pd.concat((distractor_resp, pdr))
    exp_resp = pd.concat((exp_resp, per))
  
  stats = grade_responses(filler_resp, distractor_resp, exp_resp)
  return stats


In [None]:
# stats = process_participant(responses_df.iloc[1],pid=1)
stats = process_all_participants(responses_df)
stats

{'n_filler': 264,
 'filler_correct': 226,
 'filler_yes': 226,
 'filler_accuracy': 0.8560606060606061,
 'filler_np': 226,
 'filler_nr': 264,
 'filler_precision': 1.0,
 'filler_recall': 0.8560606060606061,
 'n_distractor': 185,
 'distractor_correct': 166,
 'distractor_yes': 19,
 'distractor_accuracy': 0.8972972972972973,
 'distractor_np': 19,
 'distractor_nr': 0,
 'distractor_precision': 0.0,
 'distractor_recall': 0,
 'n_exp': 219,
 'exp_correct': 153,
 'exp_yes': 105,
 'exp_accuracy': 0.6986301369863014,
 'exp_np': 105,
 'exp_nr': 77,
 'exp_precision': 0.5523809523809524,
 'exp_recall': 0.7532467532467533,
 'exp_plpl': 39,
 'exp_impl': 79,
 'exp_plim': 59,
 'exp_imim': 42,
 'n_exp_ap': 128,
 'exp_ap_correct': 86,
 'exp_ap_yes': 54,
 'exp_ap_accuracy': 0.671875,
 'exp_ap_np': 54,
 'exp_ap_nr': 28,
 'exp_ap_precision': 0.37037037037037035,
 'exp_ap_recall': 0.7142857142857143,
 'exp_ap_plpl': 13,
 'exp_ap_impl': 58,
 'exp_ap_plim': 36,
 'exp_ap_imim': 21,
 'n_exp_ap_0': 79,
 'exp_ap_0_cor