In [1]:
import os
import numpy as np
import pandas
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
#paths
data_path = './data/behavioural_data'
figures_path = './figures'

In [8]:
files = np.sort(np.array(os.listdir(data_path)))
files = [file for file in files if 'order' not in file and '.ipynb' not in file]
paired_files = [(files[i], files[i+1]) for i in range(0, len(files)-1, 2)]

data_df = pandas.DataFrame({})

for imagery_score, playlist_timestamps in paired_files:
    #supplemental run data
    print(imagery_score, playlist_timestamps)
    subject = imagery_score[:6]
    run_i = imagery_score.find('run-')+len('run-')
    run = imagery_score[run_i:run_i+2]
    run = int(run[0]) if run[-1] == '_' else int(run)
    session = int(imagery_score[imagery_score.find('ses-00')+len('ses-00')])
    
    subject_runs_order = 'S{}_Playlist_order.tsv'.format(subject[1:])
    order = pandas.read_csv(os.path.join(data_path, subject_runs_order), sep=' ')

        
    i = order.loc[(order['run_events']==run) & (order['session']==session)].index
    playlist = int(order.playlist.iloc[i].values[0])
    repetition = int(order.repetition.iloc[i].values[0])
    
    #extract data from tsv
    score_path = os.path.join(data_path, imagery_score)
    score_df = pandas.read_csv(score_path, sep='\t')
    timestamps_path = os.path.join(data_path, playlist_timestamps)
    timestamps_df = pandas.read_csv(timestamps_path, sep='\t')
    
    #manage df
    score_df.drop('question', axis='columns', inplace=True)
    timestamps_df.drop('path', axis='columns', inplace=True)
    run_df = pandas.concat([timestamps_df, score_df], axis='columns', join='outer')
    run_df.drop('track', axis='columns', inplace=True) 
    run_df['sub'] = subject
    run_df['session'] = session
    run_df['run'] = int(order.run.iloc[i].values[0])
    run_df['run_event'] = run
    run_df['playlist'] = playlist
    run_df['repetition'] = repetition

    data_df = pandas.concat([data_df, run_df], axis='rows', join='outer', ignore_index=True)

sub-01_ses-001_20240322-093347_task-mutemusic_run-1_events-001.tsv sub-01_ses-001_20240322-093347_task-mutemusic_run-1_events.tsv
sub-01_ses-001_20240322-093347_task-mutemusic_run-2_events-001.tsv sub-01_ses-001_20240322-093347_task-mutemusic_run-2_events.tsv
sub-01_ses-001_20240322-093347_task-mutemusic_run-3_events-001.tsv sub-01_ses-001_20240322-093347_task-mutemusic_run-3_events.tsv
sub-01_ses-002_20240326-104509_task-mutemusic_run-4_events-001.tsv sub-01_ses-002_20240326-104509_task-mutemusic_run-4_events.tsv
sub-01_ses-002_20240326-104509_task-mutemusic_run-5_events-001.tsv sub-01_ses-002_20240326-104509_task-mutemusic_run-5_events.tsv
sub-01_ses-002_20240326-104509_task-mutemusic_run-6_events-001.tsv sub-01_ses-002_20240326-104509_task-mutemusic_run-6_events.tsv
sub-01_ses-003_20240402-104826_task-mutemusic_run-7_events-001.tsv sub-01_ses-003_20240402-104826_task-mutemusic_run-7_events.tsv
sub-01_ses-003_20240402-104826_task-mutemusic_run-8_events-001.tsv sub-01_ses-003_20240402

In [11]:
#sub-06 had a familiar track repeated 2 times in the first playlist (11 tracks instead of 10)
print(data_df.loc[(data_df['sub']=='sub-06')&
                  (data_df['session']==1)&
                  (data_df['title']=='The_Office_(Main_Theme)_silenced.wav')])
data_df.drop(545, inplace=True)
data_df.to_csv('./data/sub_session_data.tsv', sep="\t")

    Groupe    category                                 title  \
541      F  Orchestral  The_Office_(Main_Theme)_silenced.wav   
545      F  Orchestral  The_Office_(Main_Theme)_silenced.wav   

     silence_duration  total_duration       onset  value confirmation     sub  \
541                16            62.0   76.082241      4          yes  sub-06   
545                16            62.0  331.742244      4          yes  sub-06   

     session  run  run_event  playlist  repetition  
541        1    1          1         1           1  
545        1    1          1         1           1  


In [None]:
from scipy.stats import wilcoxon, ttest_ind

for sub in ['sub-01', 'sub-02', 'sub-03', 'sub-06']:
    unfamiliar = data_df['value'].loc[(data_df['Groupe']=='U')&
                        (data_df['sub']==sub)].values
    familiar = data_df['value'].loc[(data_df['Groupe']=='F')&
                        (data_df['sub']==sub)].values
    stat = ttest_ind(unfamiliar, familiar)
    print(sub, stat)
    print('familiar', np.median(familiar), np.mean(familiar))
    print('unfamiliar', np.median(unfamiliar), np.mean(unfamiliar))
            

In [None]:
for sub in ['sub-01', 'sub-02', 'sub-03', 'sub-06']:
    for Groupe in ['U', 'F']:
        for paired_rep in [(1,2), (2,3), (1,3)]:
    
            rep1 = data_df.loc[(data_df['Groupe']==Groupe)&
                                    (data_df['sub']==sub)&
                                    (data_df['repetition']==paired_rep[0])]
            sorted_rep1 = rep1.sort_values(by='title')
            val1 = sorted_rep1['value'].values
            
            rep2 = data_df.loc[(data_df['Groupe']==Groupe)&
                                    (data_df['sub']==sub)&
                                    (data_df['repetition']==paired_rep[1])]
            sorted_rep2 = rep2.sort_values(by='title')
            val2 = sorted_rep2['value'].values

            stat, p_value = wilcoxon(val1, val2)
            print(sub, Groupe, paired_rep, p_value, p_value<0.05)

In [None]:
sns.boxplot(data=data_df, x="sub", y="value", hue="Groupe", notch=True)
sns.stripplot(data=data_df, x="sub", y="value", hue="Groupe", dodge=True, jitter=0.3, edgecolor='k', linewidth=0.2)
plt.legend(loc='center right', bbox_to_anchor=(1.14, 0.5))
plt.yticks(range(5))

plt.savefig(os.path.join(figures_path, 'ImaginationScore_Groupe'))

In [None]:
sns.catplot(data=data_df, x="sub", y="value", hue="repetition", col="Groupe", kind='box')
plt.yticks(range(5))
plt.savefig(os.path.join(figures_path, 'ImaginationScore_Groupe_Repetition'))

In [None]:
sns.catplot(data=data_df, x="sub", y="value", hue="category", col="Groupe", kind='box')
plt.yticks(range(5))
plt.savefig(os.path.join(figures_path, 'ImaginationScore_Groupe_Category'))

In [None]:
print(data_df.loc[data_df['sub']=='sub-01'])