In [1]:
import PyQt6.QtCore
import os
os.environ["QT_API"] = "pyqt6"
import matplotlib.pyplot as plt

# Use qt backend for matplotlab to use interactive mne plots
%matplotlib qt

import mne 
import analysis.processing
import pandas as pd
import csv 
import os
from config import Config
configObj = Config()
from mne_connectivity import spectral_connectivity_time
import numpy as np
configss = configObj.getConfigSnapshot()
from tqdm import tqdm
import tools.helpers
from scipy import stats

mne.set_log_level(verbose='WARNING', return_old_level=False, add_frames=None)

In [2]:
[el for el in list(range(1,32)) if el not in [14, 5, 13, 16, 17, 20] ]

[1,
 2,
 3,
 4,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 15,
 18,
 19,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31]

In [5]:
listsss =  [el for el in list(range(1,32)) if el not in [14, 5, 13, 16, 17, 20, 31] ]
df = None
for pnum in tqdm(listsss):

    partipant_data_path =  str(pnum) +'.csv'
    path_qa = os.path.join(configss['root'], configss['data_qa'] , partipant_data_path ) 

    if df is None:
        df =  pd.read_csv(path_qa)
    else:
        df = pd.concat([df, pd.read_csv(path_qa)], axis=0)   

# Calculate means and standard errors for each BlockType
means = df.groupby('BlockType')['Correct'].mean()
counts = df.groupby('BlockType')['Correct'].count()
stds = df.groupby('BlockType')['Correct'].std()
stderr = stds / np.sqrt(counts)

# 95% Confidence Intervals (CI = mean ± 1.96 * standard error)
confidence_intervals = 1.96 * stderr

# Perform a t-test to calculate p-value between block types
block_type_d = df[df['BlockType'] == 'D']['Correct']
block_type_nd = df[df['BlockType'] == 'ND']['Correct']
t_stat, p_value = stats.ttest_ind(block_type_d, block_type_nd)

# Plotting with error bars
plt.figure(figsize=(8,6))
plt.bar(means.index, means, yerr=confidence_intervals, capsize=10)

# Adding titles, labels, and annotations
plt.title(f'Q&A Accuracy for Each Block Type (p-value = {p_value:.4f})')
plt.xlabel('Block Type')
plt.ylabel('Accuracy (Proportion of Correct Answers)')
plt.xticks(rotation=0)

# Display the plot
plt.show()

100%|██████████| 24/24 [00:00<00:00, 839.75it/s]


In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
import pingouin as pg  # For repeated measures ANOVA

# List of participant IDs excluding certain ones
listsss = [el for el in list(range(1, 32)) if el not in [14, 5, 13, 16, 17, 20, 31]]

# Load the participant data
df = None
for pnum in tqdm(listsss):
    partipant_data_path = str(pnum) + '.csv'
    path_qa = os.path.join(configss['root'], configss['data_qa'], partipant_data_path)
    
    # Read the data for the current participant
    participant_df = pd.read_csv(path_qa)
    
    # Add a column for ParticipantID
    participant_df['ParticipantID'] = pnum
    
    # Concatenate the data
    if df is None:
        df = participant_df
    else:
        df = pd.concat([df, participant_df], axis=0)

# Assuming the dataset contains columns 'BlockType' and 'Correct'

# Perform Repeated Measures ANOVA
aov = pg.rm_anova(dv='Correct', within='BlockType', subject='ParticipantID', data=df, detailed=True)

# Print the ANOVA table
print(aov)

# Calculate means and standard errors for each BlockType
means = df.groupby('BlockType')['Correct'].mean()
counts = df.groupby('BlockType')['Correct'].count()
stds = df.groupby('BlockType')['Correct'].std()
stderr = stds / np.sqrt(counts)

# 95% Confidence Intervals (CI = mean ± 1.96 * standard error)
confidence_intervals = 1.96 * stderr

# Plotting with error bars
plt.figure(figsize=(8, 6))
plt.bar(means.index, means, yerr=confidence_intervals, capsize=10, alpha=0.7)

# Adding titles, labels, and ANOVA result
p_value = aov['p-unc'][0]  # Extracting p-value from the ANOVA table
plt.title(f'Q&A Accuracy for Each Block Type (p-value = {p_value:.4f})')
plt.xlabel('Block Type')
plt.ylabel('Accuracy (Proportion of Correct Answers)')
plt.xticks(rotation=0)

# Display the plot
plt.show()


100%|██████████| 24/24 [00:00<00:00, 770.04it/s]

      Source        SS  DF        MS         F     p-unc       ng2  eps
0  BlockType  0.090422   1  0.090422  5.398047  0.029356  0.075492  1.0
1      Error  0.385272  23  0.016751       NaN       NaN       NaN  NaN





In [4]:
# with normality check
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
import pingouin as pg
from scipy.stats import shapiro

# List of participant IDs excluding certain ones
listsss = [el for el in list(range(1, 32)) if el not in [14, 5, 13, 16, 17, 20, 31]]

# Load the participant data
df = None
for pnum in tqdm(listsss):
    partipant_data_path = str(pnum) + '.csv'
    path_qa = os.path.join(configss['root'], configss['data_qa'], partipant_data_path)
    
    # Read the data for the current participant
    participant_df = pd.read_csv(path_qa)
    
    # Add a column for ParticipantID
    participant_df['ParticipantID'] = pnum
    
    # Concatenate the data
    if df is None:
        df = participant_df
    else:
        df = pd.concat([df, participant_df], axis=0)

# Assuming the dataset contains columns 'BlockType' and 'Correct'

# Perform the Shapiro-Wilk test for normality on the 'Correct' scores for each 'BlockType'
normal = True
for block_type in df['BlockType'].unique():
    stat, p = shapiro(df[df['BlockType'] == block_type]['Correct'])
    if p < 0.05:  # If p-value is less than 0.05, data is not normally distributed
        normal = False
        break

# If not normal, apply log transformation
if not normal:
    df['Correct'] = np.log1p(df['Correct'])  # Apply log transformation

# Perform Repeated Measures ANOVA
aov = pg.rm_anova(dv='Correct', within='BlockType', subject='ParticipantID', data=df, detailed=True)

# Extract F-statistic and effect size (partial eta squared)
f_stat = aov['F'][0]
effect_size = aov['ng2'][0]

# Print the ANOVA table, F-statistic, and effect size
print(aov)
print(f"F-statistic: {f_stat}")
print(f"Effect size (partial eta squared): {effect_size}")

# Calculate means and standard errors for each BlockType
means = df.groupby('BlockType')['Correct'].mean()
counts = df.groupby('BlockType')['Correct'].count()
stds = df.groupby('BlockType')['Correct'].std()
stderr = stds / np.sqrt(counts)

# 95% Confidence Intervals (CI = mean ± 1.96 * standard error)
confidence_intervals = 1.96 * stderr

# Plotting with error bars
plt.figure(figsize=(8, 6))
plt.bar(means.index, means, yerr=confidence_intervals, capsize=10, alpha=0.7)

# Adding titles, labels, and ANOVA result
p_value = aov['p-unc'][0]  # Extracting p-value from the ANOVA table
plt.title(f'Q&A Accuracy for Each Block Type (p-value = {p_value:.4f})')
plt.xlabel('Block Type')
plt.ylabel('Accuracy (Proportion of Correct Answers)')
plt.xticks(rotation=0)

# Display the plot
plt.show()


100%|██████████| 24/24 [00:00<00:00, 784.32it/s]

      Source        SS  DF        MS         F     p-unc       ng2  eps
0  BlockType  0.043444   1  0.043444  5.398047  0.029356  0.075492  1.0
1      Error  0.185105  23  0.008048       NaN       NaN       NaN  NaN
F-statistic: 5.398047315058214
Effect size (partial eta squared): 0.07549220920401024





In [12]:
# Calculate mean and standard deviation for 'D' and 'ND' block types
mean_d = df[df['BlockType'] == 'D']['Correct'].mean()
std_d = df[df['BlockType'] == 'D']['Correct'].std()

mean_nd = df[df['BlockType'] == 'ND']['Correct'].mean()
std_nd = df[df['BlockType'] == 'ND']['Correct'].std()

# Print the means and standard deviations
print(f"Mean Accuracy for 'D': {mean_d:.4f}, Standard Deviation: {std_d:.4f}")
print(f"Mean Accuracy for 'ND': {mean_nd:.4f}, Standard Deviation: {std_nd:.4f}")

Mean Accuracy for 'D': 0.4765, Standard Deviation: 0.3218
Mean Accuracy for 'ND': 0.5367, Standard Deviation: 0.2903


In [11]:
# Plotting with error bars
plt.figure(figsize=(8, 6))
bars = plt.bar(means.index, means, yerr=confidence_intervals, capsize=10, alpha=0.7, width=0.4, edgecolor='black', linewidth=1.5)

# Making bar edges rounded
for bar in bars:
    bar.set_linewidth(1.5)
    bar.set_capstyle('round')
      

#plt.title(f'Q&A Accuracy for Each Block Type (p-value = {p_value:.4f})')
plt.xlabel('Block Type')
plt.ylabel('Accuracy (Proportion of Correct Answers)')
plt.xticks(ticks = range(len(means.index)), labels = ['Distractor', 'Attentive'],  rotation=0)

# Display the plot
plt.show()

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
from scipy import stats

# Assuming 'configss' is a dictionary containing paths
# Replace 'configss' with actual paths if necessary

# List of participant IDs, excluding specific IDs
participant_ids = [el for el in range(1, 32) if el not in [5, 13, 14, 16, 17, 20, 31]]

# Initialize a list to store the results
results = []

# Iterate over each participant
for pnum in tqdm(participant_ids, desc='Processing Participants'):
    # Construct the path to the participant's data file
    participant_data_filename = f"{pnum}.csv"
    participant_data_path = os.path.join(configss['root'], configss['data_qa'], participant_data_filename)
    
    # Read the participant's data
    df_participant = pd.read_csv(participant_data_path)
    
    # Calculate accuracies for each condition
    accuracy_d = df_participant[df_participant['BlockType'] == 'D']['Correct'].mean()
    accuracy_nd = df_participant[df_participant['BlockType'] == 'ND']['Correct'].mean()
    overall_accuracy = df_participant['Correct'].mean()
    
    # Append the result to the list
    results.append({
        'PID': pnum,
        'D_accuracy': accuracy_d,
        'ND_accuracy': accuracy_nd,
        'Overall_accuracy': overall_accuracy
    })

# Create a DataFrame from the results
df_results = pd.DataFrame(results)

# Save the DataFrame to a CSV file
csv_filename = 'participant_QandA_accuracies_by_condition.csv'
df_results.to_csv(csv_filename, index=False)

print(f"Participant accuracies by condition saved to {csv_filename}")

# Display the DataFrame
print(df_results)


Processing Participants: 100%|██████████| 24/24 [00:00<00:00, 503.40it/s]

Participant accuracies by condition saved to participant_QandA_accuracies_by_condition.csv
    PID  D_accuracy  ND_accuracy  Overall_accuracy
0     1    0.833333     0.916667          0.875000
1     2    0.333333     0.750000          0.541667
2     3    0.916667     0.833333          0.875000
3     4    0.500000     0.916667          0.708333
4     6    0.666667     0.500000          0.583333
5     7    0.750000     0.666667          0.708333
6     8    0.583333     0.916667          0.750000
7     9    0.500000     0.666667          0.583333
8    10    0.750000     0.666667          0.708333
9    11    0.916667     0.916667          0.916667
10   12    0.583333     0.750000          0.666667
11   15    0.833333     0.916667          0.875000
12   18    0.750000     0.916667          0.833333
13   19    0.500000     0.583333          0.541667
14   21    0.750000     0.500000          0.625000
15   22    0.583333     0.666667          0.625000
16   23    0.916667     0.750000          




In [4]:


path_qa = os.path.join(configss['root'], configss['data_qa'] , partipant_data_path ) 
path_tones = os.path.join(configss['root'], configss['data_tones'] , partipant_data_path ) 
path_completion = os.path.join(configss['root'], configss['data_completion'] , partipant_data_path ) 
path_reading_data = os.path.join(configss['root'], configss['data_reading_data'] , partipant_data_path ) 

KeyError: 'data_qa'

In [3]:


# Calculate mean and standard error for each block type
accuracy_stats = df_combined.groupby('BlockType')['Correct'].agg(['mean', 'count'])
accuracy_stats['stderr'] = np.sqrt((accuracy_stats['mean'] * (1 - accuracy_stats['mean'])) / accuracy_stats['count'])

# Calculate the 95% confidence interval using the standard error
confidence_interval = stats.norm.ppf(0.975) * accuracy_stats['stderr']

# Plotting the accuracy with error bars
plt.figure(figsize=(8,6))
plt.bar(accuracy_stats.index, accuracy_stats['mean'], yerr=confidence_interval, capsize=5)

# Adding titles and labels
plt.title('Question Accuracy for Each Block Type (with 95% CI)')
plt.xlabel('Block Type')
plt.ylabel('Accuracy (Proportion of Correct Answers)')
plt.xticks(rotation=0)

# Show plot
plt.show()

NameError: name 'df_combined' is not defined