In [None]:
import pandas as pd
import glob 
import numpy as np

In [None]:
recall_glob = glob.glob('full_results/recall' + '*.csv')
recall_glob.sort()

beh_glob = glob.glob('full_results/beh' + '*.csv')
beh_glob.sort()

In [None]:
recall, beh = [], []

for file in recall_glob:
    df = pd.read_csv(file, encoding = 'cp1250')
    recall.append(df)

for file in beh_glob:
    df = pd.read_csv(file, encoding = 'cp1250')
    beh.append(df)

In [None]:
beh = [df[~df['trial_type'].str.endswith('_training')] for df in beh]

In [None]:
semantic = [df[df['trial_type'].str.startswith('semantic_')] for df in beh]
perceptual = [df[df['trial_type'].str.startswith('perceptual_')] for df in beh]

In [None]:
first_sem = [df[:int(len(df)/2)] for df in semantic]
second_sem = [df[int(len(df)/2):] for df in semantic]

first_perc = [df[:int(len(df)/2)] for df in perceptual]
second_perc = [df[int(len(df)/2):] for df in perceptual]

In [None]:
for df in first_sem:
    df.loc[df["answer_rt"] == -1, 'answer_rt'] = np.nan
for df in second_sem:
    df.loc[df["answer_rt"] == -1, 'answer_rt'] = np.nan
for df in first_perc:
    df.loc[df["answer_rt"] == -1, 'answer_rt'] = np.nan
for df in second_perc:
    df.loc[df["answer_rt"] == -1, 'answer_rt'] = np.nan

In [None]:
mean_fs = [df["answer_rt"].mean() for df in first_sem]
mean_ss = [df["answer_rt"].mean() for df in second_sem]

mean_fp = [df["answer_rt"].mean() for df in first_perc]
mean_sp = [df["answer_rt"].mean() for df in second_perc]

In [None]:
sd_fs = [df["answer_rt"].std() for df in first_sem]
sd_ss = [df["answer_rt"].std() for df in second_sem]

sd_fp = [df["answer_rt"].std() for df in first_perc]
sd_sp = [df["answer_rt"].std() for df in second_perc]

In [None]:
# 0 - offline, 1 - online

for df, mean, std in zip(first_sem, mean_fs, sd_fs):
    df['RTV'] = [abs((mean - rt)/std) for rt in df['answer_rt']]
for df, mean, std in zip(second_sem, mean_ss, sd_ss):
    df['RTV'] = [abs((mean - rt)/std) for rt in df['answer_rt']]
for df, mean, std in zip(first_perc, mean_fp, sd_fp):
    df['RTV'] = [abs((mean - rt)/std) for rt in df['answer_rt']]
for df, mean, std in zip(second_perc, mean_sp, sd_sp):
    df['RTV'] = [abs((mean - rt)/std) for rt in df['answer_rt']]

In [None]:
median_fs = [df["RTV"].median() for df in first_sem]
median_ss = [df["RTV"].median() for df in second_sem]

median_fp = [df["RTV"].median() for df in first_perc]
median_sp = [df["RTV"].median() for df in second_perc]

In [None]:
# 0 - offline, 1 - online

for df, median in zip(first_sem, median_fs):
    df['STATE'] = [0 if rtv > median else 1 for rtv in df['RTV']]
for df, median in zip(second_sem, median_ss):
    df['STATE'] = [0 if rtv > median else 1 for rtv in df['RTV']]
for df, median in zip(first_perc, median_fs):
    df['STATE'] = [0 if rtv > median else 1 for rtv in df['RTV']]
for df, median in zip(second_perc, median_ss):
    df['STATE'] = [0 if rtv > median else 1 for rtv in df['RTV']]

In [None]:
for i,x in enumerate(first_perc):
    print(f"first: {first_perc[i]['STATE'].value_counts()}")
    print(f"sec: {second_perc[i]['STATE'].value_counts()}")
    print()

In [None]:
merged_perc = []
for first, second in zip(first_perc, second_perc):
    df_merg = pd.concat([first, second])
    merged_perc.append(df_merg)

merged_sem = []
for first, second in zip(first_sem, second_sem):
    df_merg = pd.concat([first, second])
    merged_sem.append(df_merg)


In [None]:
rec_in_task = []

for df in recall:
    new = df.loc[df['in_task'] == True]
    new.rename(columns = {'acc': 'recall_acc'}, inplace= True)
    rec_in_task.append(new)

In [None]:
m_sem_rec, m_perc_rec = [], []

for task, recall in zip(merged_sem,rec_in_task):
    result = pd.merge(task, recall, on='sentence')
    m_sem_rec.append(result)

for task, recall in zip(merged_perc,rec_in_task):
    result = pd.merge(task, recall, on='sentence')
    m_perc_rec.append(result)

In [None]:
sem_cleared, per_cleared = [], []
for df in m_sem_rec:
    df = df.drop(columns = ['trial_number', 'in_task'])
    sem_cleared.append(df)

for df in m_perc_rec:
    df = df.drop(columns = ['trial_number', 'in_task'])
    per_cleared.append(df)

In [None]:
sem_results = []
perc_results = []

for x in sem_cleared:
    online = x.loc[x['STATE'] == 1]
    offline = x.loc[x['STATE'] == 0]
    online_trial_count = len(online)
    offline_trial_count = len(offline)
    all_count = online_trial_count + offline_trial_count
    online_cor, offline_cor = len(online[online['recall_acc'] == 1]), len(offline[offline['recall_acc'] == 1])
    all_acc = online_cor + offline_cor
    sem_results.append([online_cor/online_trial_count, offline_cor/offline_trial_count, all_acc/all_count])

for x in per_cleared:
    online = x.loc[x['STATE'] == 1]
    offline = x.loc[x['STATE'] == 0]
    online_trial_count = len(online)
    offline_trial_count = len(offline)
    all_count = online_trial_count + offline_trial_count
    online_cor, offline_cor = len(online[online['recall_acc'] == 1]), len(offline[offline['recall_acc'] == 1])
    all_acc = online_cor + offline_cor
    perc_results.append([online_cor/online_trial_count, offline_cor/offline_trial_count, all_acc/all_count])

final_results = []
for x, y in zip(sem_results, perc_results):
    listn = [x[2],y[2],x[0],x[1],y[0],y[1]]
    final_results.append(listn)

final_results_df = pd.DataFrame(final_results, columns = ['semantic', 'perceptual', 'semantic_online', 'semantic_offline', 'perceptual_online', 'perceptual_offline'])

final_results_df["delta_SEM"] = final_results_df['semantic_online'] - final_results_df['semantic_offline']
final_results_df["delta_PERC"] = final_results_df['perceptual_online'] - final_results_df['perceptual_offline']

In [None]:
final_results_df.to_csv('final_results.csv')

In [None]:
import matplotlib.pyplot as plt

# Sample data

# Using pandas plot method
final_results_df.plot(kind='box', subplots=True, layout=(1, len(df.columns)), figsize=(12, 6), sharey=True, title = "Recall accuracy comparison", ylim = (0,1))
plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns

reaction_times_semantic = pd.concat([df['answer_rt'] for df in semantic], ignore_index=True)

sns.histplot(reaction_times_semantic, kde=True)
plt.xlabel('Reaction Time')
plt.ylabel('Frequency')
plt.title('Distribution of Reaction Times in Semantic')
plt.xlim(0.3, 4.2)
plt.ylim(0, 210)
plt.show()


In [None]:
reaction_times_perceptual = pd.concat([df['answer_rt'] for df in perceptual], ignore_index=True)

sns.histplot(reaction_times_perceptual, kde=True)
plt.xlabel('Reaction Time')
plt.ylabel('Frequency')
plt.title('Distribution of Reaction Times in Perceptual')
plt.xlim(0.3, 4.2)
plt.ylim(0, 210)
plt.show()


In [None]:
semantic_mean_reaction_time = reaction_times_semantic.mean()
semantic_std_reaction_time = reaction_times_semantic.std()
semantic_min_reaction_time = reaction_times_semantic.min()
semantic_max_reaction_time = reaction_times_semantic.max()

perceptual_mean_reaction_time = reaction_times_perceptual.mean()
perceptual_std_reaction_time = reaction_times_perceptual.std()
perceptual_min_reaction_time = reaction_times_perceptual.min()
perceptual_max_reaction_time = reaction_times_perceptual.max()


In [None]:
print(f"semantic_mean_reaction_time: {semantic_mean_reaction_time}\nsemantic_std_reaction_time: {semantic_std_reaction_time}\nsemantic_min_reaction_time: {semantic_min_reaction_time}\nsemantic_max_reaction_time: {semantic_max_reaction_time}\n\nperceptual_mean_reaction_time: {perceptual_mean_reaction_time}\nperceptual_std_reaction_time: {perceptual_std_reaction_time}\nperceptual_min_reaction_time: {perceptual_min_reaction_time}\nperceptual_max_reaction_time: {3.666825}\n")

In [None]:
dfs = merged_sem + merged_perc

combined_df = pd.concat(dfs, ignore_index=True)

reaction_times_state_0 = combined_df[combined_df['STATE'] == 0]['answer_rt']
reaction_times_state_1 = combined_df[combined_df['STATE'] == 1]['answer_rt']

plt.figure(figsize=(12, 6))

sns.histplot(reaction_times_state_0, kde=True, color='blue', label='Offline')
sns.histplot(reaction_times_state_1, kde=True, color='orange', label='Online')

plt.xlabel('Reaction Time')
plt.ylabel('Frequency')
plt.title('Distribution of Reaction Times by Offline/Online State')
plt.legend()
plt.show()


In [None]:
m_all = m_sem_rec + m_perc_rec

recall_dict = {}

for dataframe in m_all:
    for _, row in dataframe.iterrows():

        sentence = row['sentence']
        recall_acc = row['recall_acc']
        if sentence not in recall_dict:
            recall_dict[sentence] = {'num_recall': 0, 'num_occurrence': 0}
        
        recall_dict[sentence]['num_recall'] += recall_acc
        recall_dict[sentence]['num_occurrence'] += 1

sentence_usage_recall = pd.DataFrame([(sentence, values['num_recall'], values['num_occurrence']) 
                          for sentence, values in recall_dict.items()], 
                         columns=['sentence', 'num_recall', 'num_occurrence'])

sentence_usage_recall['recall_percentage'] = sentence_usage_recall['num_recall'] / sentence_usage_recall['num_occurrence'] * 100

sentence_usage_recall = sentence_usage_recall.sort_values('recall_percentage', ascending = False)


In [None]:
plt.figure(figsize=(12, 8))
sns.barplot(data=sentence_usage_recall, x='recall_percentage', y='sentence', palette='viridis')
plt.xlabel('Recall Percentage')
plt.ylabel('Sentence')
plt.yticks([])
plt.title('Recall Percentage by Sentence')
plt.show()

In [None]:
sentence_usage_recall = sentence_usage_recall.sort_values('num_occurrence', ascending = False)

In [None]:
plt.figure(figsize=(12, 8))
sns.barplot(data=sentence_usage_recall, x='num_occurrence', y='sentence', palette='viridis')
plt.xlabel('Number of sentences in experimental trials')
plt.ylabel('Sentence')
plt.yticks([])
plt.title('Occurence of each sentence')
plt.show()

In [None]:
recall_dict = {}

for dataframe in m_sem_rec:
    for _, row in dataframe.iterrows():

        sentence = row['sentence']
        recall_acc = row['recall_acc']
        if sentence not in recall_dict:
            recall_dict[sentence] = {'num_recall': 0, 'num_occurrence': 0}
        
        recall_dict[sentence]['num_recall'] += recall_acc
        recall_dict[sentence]['num_occurrence'] += 1

sentence_usage_recall_sem = pd.DataFrame([(sentence, values['num_recall'], values['num_occurrence']) 
                          for sentence, values in recall_dict.items()], 
                         columns=['sentence', 'num_recall', 'num_occurrence'])

sentence_usage_recall_sem['recall_percentage'] = sentence_usage_recall_sem['num_recall'] / sentence_usage_recall_sem['num_occurrence'] * 100

sentence_usage_recall_sem = sentence_usage_recall_sem.sort_values('sentence', ascending = True)
sentence_usage_recall_sem


In [None]:
recall_dict = {}

for dataframe in m_perc_rec:
    for _, row in dataframe.iterrows():

        sentence = row['sentence']
        recall_acc = row['recall_acc']
        if sentence not in recall_dict:
            recall_dict[sentence] = {'num_recall': 0, 'num_occurrence': 0}
        
        recall_dict[sentence]['num_recall'] += recall_acc
        recall_dict[sentence]['num_occurrence'] += 1

sentence_usage_recall_perc = pd.DataFrame([(sentence, values['num_recall'], values['num_occurrence']) 
                          for sentence, values in recall_dict.items()], 
                         columns=['sentence', 'num_recall', 'num_occurrence'])

sentence_usage_recall_perc['recall_percentage'] = sentence_usage_recall_perc['num_recall'] / sentence_usage_recall_perc['num_occurrence'] * 100

sentence_usage_recall_perc = sentence_usage_recall_perc.sort_values('sentence', ascending = True)
sentence_usage_recall_perc


In [None]:
sentence_usage_recall_sem = sentence_usage_recall_sem.rename(columns={'num_recall': 'num_recall_sem', 
                                         'num_occurrence': 'num_occurrence_sem', 
                                         'recall_percentage': 'recall_percentage_sem'})
sentence_usage_recall_perc = sentence_usage_recall_perc.rename(columns={'num_recall': 'num_recall_perc', 
                                           'num_occurrence': 'num_occurrence_perc', 
                                           'recall_percentage': 'recall_percentage_perc'})

sentence_usage_recall_compar = pd.concat([sentence_usage_recall_sem.set_index('sentence'), sentence_usage_recall_perc.set_index('sentence')], axis=1)

sentence_usage_recall_compar.reset_index(inplace=True)

sentence_usage_recall_compar[340:]