In [1]:
import os, glob
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import stats
%matplotlib qt

In [2]:
# Import data file
data_directory = 'C:/Users/mvmigem/Documents/data/project_1/raw_data/'
dir_list = glob.glob(data_directory+'sub_*/behav/*.csv')


In [30]:
df_list = []
for i in range(len(dir_list)):
    behav_dat = pd.read_csv(dir_list[i])
    df_list.append(behav_dat)

big_df = pd.concat(df_list,ignore_index=True)
big_df= big_df[~(big_df.participant.isin([3,14,20]))]

In [None]:
"""
Counting the amount of raw events
"""
sub_list = big_df['participant'].unique() 
# Step 1: Group by subject, attention, and expectation, and count rows
df_repeated = big_df.loc[np.repeat(big_df.index, 4)].reset_index(drop=True)
df_to_count = df_repeated[~(df_repeated.participant.isin([3,14,20]))]
grouped = df_to_count.groupby(['participant', 'attention', 'expected']).size().reset_index(name='row_count')

# Step 3: Group by attention and expectation, and calculate the mean and standard deviation
stats_per_condition = grouped.groupby(['attention', 'expected'])['row_count'].agg(['mean', 'std']).reset_index()


In [43]:
def calculate_subtraction(row):
    if row['catch_trial'] != 0:
        relevant_timepoint_col = f"t_stim_{row['catch_trial']}"
        rt =  row['press_time'] - row[relevant_timepoint_col] - 400
        if rt > 3200:
            return np.nan
        return rt
# Apply the function to create the subtraction column
big_df['rt'] = big_df.apply(calculate_subtraction, axis=1)

In [25]:
"""
***Aestetics***
"""
# load color palette
my_palette = sns.color_palette().as_hex()
my_palette1 = sns.color_palette("hls",8).as_hex()
sns.color_palette("hls",8).as_hex()


In [32]:
# Load peak properties of localiser data
peak_properties = pd.read_csv(r'C:\Users\mvmigem\Documents\data\project_1\compiled_dataframes\c1_peak_properties.csv')
peak_properties = peak_properties[~(peak_properties.subject.isin([3,14,20]))]
peakch_n =peak_properties['all_pos_peak_channel'].value_counts()


In [34]:
subject_loc_quad = big_df.groupby('participant')['loc_quad'].first().value_counts()

fig, axes = plt.subplots(2,1,figsize=(7, 10),sharey=True,sharex=False)
sns.set_context("paper")

# line_colours1= ['#219EBC','#FFB703']['#8ECAE6','#FB8500','#219EBC','#FFB703']
line_colours2 = line_colours = [my_palette[0],my_palette[2],my_palette[1],my_palette[3]]

sns.barplot( subject_loc_quad, palette= line_colours2, ax=axes[0])
sns.barplot( peakch_n, palette= my_palette1,ax=axes[1])
tick_labels = ['Top left', 'Top right', 'Bottom right','Bottom left']  # Replace ... with additional labels

axes[0].axhline(y=0, lw=1, c='black' )
axes[0].axhline(y=0, lw=1, c='black' )
axes[0].set_ylabel('N Participants', fontdict={'family': 'Segoe UI', 'weight': 'semibold','size':18})
axes[0].set_xlabel('Optimal quadrant', fontdict={'family': 'Segoe UI', 'weight': 'semibold','size':18})
axes[0].set_xticklabels(tick_labels, fontdict={'family': 'Segoe UI','size':14})

axes[1].set_ylabel('N Participants', fontdict={'family': 'Segoe UI', 'weight': 'semibold','size':18})
axes[1].set_xlabel('Selected electrode', fontdict={'family': 'Segoe UI', 'weight': 'semibold','size':18})
# axes[1].set_xticklabels(tick_labels, fontdict={'family': 'Segoe UI','size':14})

axes[0].tick_params(axis='x', labelsize=16)  # Font size for x-axis numbers
axes[0].tick_params(axis='y', labelsize=16) 

# Change the size of the numbers on the axes
axes[1].tick_params(axis='x', labelsize=16)  # Font size for x-axis numbers
axes[1].tick_params(axis='y', labelsize=16) 

sns.despine(offset=10, trim=True)

plt.tight_layout()
plt.show()
fig.savefig('fig_c1_main.png')
plt.show()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot( subject_loc_quad, palette= line_colours2, ax=axes[0])

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot( peakch_n, palette= my_palette1,ax=axes[1])
  sns.barplot( peakch_n, palette= my_palette1,ax=axes[1])
  axes[0].set_xticklabels(tick_labels, fontdict={'family': 'Segoe UI','size':14})


In [None]:
palette=my_palette[:4]

In [44]:
ax = sns.barplot(data= big_df, x='attention',y='rt',
                 edgecolor= 'black', palette= [my_palette[4], my_palette[5]],
                errorbar='se', errwidth= 1.5,
                capsize=.1,width=.6, linewidth=0.7)


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(data= big_df, x='attention',y='rt',

The `errwidth` parameter is deprecated. And will be removed in v0.15.0. Pass `err_kws={'linewidth': 1.5}` instead.

  ax = sns.barplot(data= big_df, x='attention',y='rt',


In [53]:
sub_grouped = big_df.groupby(['participant','attention'])['rt'].mean()
# sub_grouped = sub_grouped.transform(stats.zscore)
sub_grouped = sub_grouped.unstack().reset_index()
t_statistic, p_value = stats.ttest_rel(sub_grouped['attended'], sub_grouped['unattended'])

In [46]:
ax = sns.barplot(data= sub_grouped,
                 edgecolor= 'black', palette= [my_palette[4], my_palette[5]],
                errorbar='se', errwidth= 1.5,
                capsize=.1,width=.6, linewidth=0.7)


The `errwidth` parameter is deprecated. And will be removed in v0.15.0. Pass `err_kws={'linewidth': 1.5}` instead.

  ax = sns.barplot(data= sub_grouped,


In [None]:
# transform for analysis
# Convert to long format
long_df = pd.melt(
    sub_grouped,
    id_vars=['participant'],  # Columns to keep as identifiers
    value_vars=['attended','unattended'],  # Columns to melt
    var_name='attention',  # Name for the new column that will store the condition names
    value_name='rt'  # Name for the new column that will store the values
)

Long Format DataFrame:
    participant   attention           rt
0             1    attended   370.063885
1             2    attended   873.307282
2             4    attended   392.930177
3             5    attended   420.788092
4             6    attended   801.167258
5             7    attended   785.650477
6             8    attended  1084.494500
7             9    attended   839.492913
8            10    attended   357.494877
9            11    attended   415.302510
10           12    attended   514.057962
11           13    attended   646.412933
12           15    attended   377.689127
13           16    attended   477.438954
14           17    attended   492.165133
15           18    attended   518.368300
16           19    attended   540.586267
17           21    attended   489.839885
18           22    attended   566.556546
19           23    attended   385.476808
20           24    attended   401.765046
21           25    attended   623.865833
22           26    attended   484.

In [55]:
destination_folder = r"C:\Users\mvmigem\Documents\data\project_1\compiled_dataframes/"
long_df.to_csv(destination_folder + "behaviour.csv")

In [None]:
excuded_pp = [3,14,20]
pp_info = big_df.groupby('participant').first().reset_index()
pp_info = pp_info[~pp_info['participant'].isin(excuded_pp)]



In [None]:
pp_info['handed'].value_counts()

In [None]:
""" 
Ugh accuracies
"""


In [57]:
peak_properties['all_pos_peak_latency'].std()

0.00623221902513645