In [2]:
import pandas as pd
import numpy as np
from scipy import stats

In [5]:
# List of all 20 participant files
participant_files = [
    '00_Recognition memory_2025-11-19.csv',
    '01_Recognition memory_2025-11-19.csv', 
    '02_Recognition memory_2025-11-19.csv',
    '03_Recognition memory_2025-11-19.csv',
    '04_Recognition memory_2025-11-19.csv',
    '05_Recognition memory_2025-11-19.csv',
    '06_Recognition memory_2025-11-19.csv',
    '07_Recognition memory_2025-11-20.csv',
    '08_Recognition memory_2025-11-20.csv',
    '09_Recognition memory_2025-11-20.csv',
    '010_Recognition memory_2025-11-20.csv',
    '011_Recognition memory_2025-11-20.csv',
    '012_Recognition memory_2025-11-20.csv',
    '013_Recognition memory_2025-11-20.csv',
    '014_Recognition memory_2025-11-20.csv',
    '015_Recognition memory_2025-11-20.csv',
    '016_Recognition memory_2025-11-20.csv',
    '017_Recognition memory_2025-11-20.csv',
    '018_Recognition memory_2025-11-20.csv',
    '019_Recognition memory_2025-11-20.csv'
]

print(f"Loaded {len(participant_files)} participant files:")
for i, file in enumerate(participant_files):
    print(f"  {i+1:2d}. {file}")

Loaded 20 participant files:
   1. 00_Recognition memory_2025-11-19.csv
   2. 01_Recognition memory_2025-11-19.csv
   3. 02_Recognition memory_2025-11-19.csv
   4. 03_Recognition memory_2025-11-19.csv
   5. 04_Recognition memory_2025-11-19.csv
   6. 05_Recognition memory_2025-11-19.csv
   7. 06_Recognition memory_2025-11-19.csv
   8. 07_Recognition memory_2025-11-20.csv
   9. 08_Recognition memory_2025-11-20.csv
  10. 09_Recognition memory_2025-11-20.csv
  11. 010_Recognition memory_2025-11-20.csv
  12. 011_Recognition memory_2025-11-20.csv
  13. 012_Recognition memory_2025-11-20.csv
  14. 013_Recognition memory_2025-11-20.csv
  15. 014_Recognition memory_2025-11-20.csv
  16. 015_Recognition memory_2025-11-20.csv
  17. 016_Recognition memory_2025-11-20.csv
  18. 017_Recognition memory_2025-11-20.csv
  19. 018_Recognition memory_2025-11-20.csv
  20. 019_Recognition memory_2025-11-20.csv


In [18]:
def calculate_performance_metrics(df):
    """Calculate performance metrics from the dataframe"""
    # Filter test trials (where 'type' is not null)
    test_trials = df[df['type'].notna()]
    
    # Calculate Hits (correctly identifying old words as old)
    hits = len(test_trials[(test_trials['type'] == 'old') & (test_trials['test_response.corr'] == 1)])
    
    # Calculate False Alarms (incorrectly identifying new words as old)
    false_alarms = len(test_trials[(test_trials['type'] == 'new') & (test_trials['test_response.corr'] == 0)])
    
    # Calculate total old and new trials
    total_old = len(test_trials[test_trials['type'] == 'old'])
    total_new = len(test_trials[test_trials['type'] == 'new'])
    
    # Calculate d-prime
    # Adjust hit rate and false alarm rate to avoid 0 or 1
    hit_rate = (hits + 0.5) / (total_old + 1) if total_old > 0 else 0.5
    fa_rate = (false_alarms + 0.5) / (total_new + 1) if total_new > 0 else 0.5
    
    # Calculate d-prime
    try:
        dprime = stats.norm.ppf(hit_rate) - stats.norm.ppf(fa_rate)
    except:
        dprime = 0
    
    # Calculate mean RT for hits
    hit_rt_trials = test_trials[(test_trials['type'] == 'old') & (test_trials['test_response.corr'] == 1)]
    rt_hits = hit_rt_trials['test_response.rt'].mean() * 1000 if not hit_rt_trials.empty else None
    
    return hits, false_alarms, dprime, rt_hits

print("running")

running


In [19]:
# Initialize data storage
all_data = []

print(f"Processing {len(participant_files)} participants...\n")

# Process all participants
for i, file_info in enumerate(participant_files):
    try:
        # Read the CSV file
        df = pd.read_csv(file_info)
        
        # Extract participant info
        participant_id = df['participant'].iloc[0] if 'participant' in df.columns else f"P{i+1:02d}"
        gender = df['gender'].iloc[0] if 'gender' in df.columns else ''
        
        # Calculate performance metrics
        hits, false_alarms, dprime, rt_hits = calculate_performance_metrics(df)
        
        # Add to data
        all_data.append({
            'Participant_ID': participant_id,
            'Gender': gender.capitalize() if gender else '',
            'Hits': hits,
            'FalseAlarms': false_alarms,
            'dprime': round(dprime, 3),
            'RT_Hits': round(rt_hits, 2) if rt_hits is not None else None
        })
        
        # Clean output
        print(f"Processed {participant_id}: Hits: {hits}, FA: {false_alarms}, d': {round(dprime, 3)}, RT: {round(rt_hits, 2) if rt_hits else 'N/A'}")
        
    except Exception as e:
        print(f"Error processing participant {i+1}: {e}")
        # Add placeholder data for failed processing
        all_data.append({
            'Participant_ID': f"P{i+1:02d}",
            'Gender': '',
            'Hits': None,
            'FalseAlarms': None,
            'dprime': None,
            'RT_Hits': None
        })

print(f"\nComplete dataset created with {len(all_data)} participants!")

Processing 20 participants...

Processed 0: Hits: 7, FA: 0, d': 1.754, RT: 1687.02
Processed 1: Hits: 6, FA: 0, d': 1.511, RT: 1722.62
Processed 2: Hits: 8, FA: 0, d': 2.029, RT: 1365.28
Processed 3: Hits: 5, FA: 2, d': 0.0, RT: 1356.36
Processed 4: Hits: 5, FA: 3, d': -0.524, RT: 1444.46
Processed 5: Hits: 6, FA: 1, d': 0.754, RT: 1027.77
Processed 6: Hits: 4, FA: 2, d': -0.23, RT: 1246.68
Processed 7: Hits: 7, FA: 0, d': 1.754, RT: 1101.72
Processed 8: Hits: 6, FA: 1, d': 0.754, RT: 1514.22
Processed 9: Hits: 6, FA: 2, d': 0.23, RT: 1373.85
Processed 10: Hits: 5, FA: 1, d': 0.524, RT: 1290.64
Processed 11: Hits: 5, FA: 1, d': 0.524, RT: 1661.11
Processed 12: Hits: 4, FA: 2, d': -0.23, RT: 870.44
Processed 13: Hits: 6, FA: 0, d': 1.511, RT: 868.91
Processed 14: Hits: 7, FA: 0, d': 1.754, RT: 767.23
Processed 15: Hits: 5, FA: 2, d': 0.0, RT: 1006.22
Processed 16: Hits: 7, FA: 0, d': 1.754, RT: 847.68
Processed 17: Hits: 6, FA: 1, d': 0.754, RT: 926.23
Processed 18: Hits: 7, FA: 0, d': 

In [21]:
# Create the final dataframe
final_df = pd.DataFrame(all_data)

# Save complete dataset
final_df.to_csv('all_participants_recognition_memory_data.csv', index=False)

print(f"\nComplete dataset created with {len(final_df)} participants!")
print("File saved as: 'all_participants_recognition_memory_data.csv'")

print("\n" + "="*60)
print("FINAL DATASET SUMMARY:")
print("="*60)
print(final_df)

print("\n" + "="*60)
print("PERFORMANCE STATISTICS:")
print("="*60)
print(f"Average Hits: {final_df['Hits'].mean():.1f} ± {final_df['Hits'].std():.1f}")
print(f"Average False Alarms: {final_df['FalseAlarms'].mean():.1f} ± {final_df['FalseAlarms'].std():.1f}")
print(f"Average d-prime: {final_df['dprime'].mean():.2f} ± {final_df['dprime'].std():.2f}")
print(f"Average RT for Hits: {final_df['RT_Hits'].mean():.0f} ± {final_df['RT_Hits'].std():.0f} ms")



Complete dataset created with 20 participants!
File saved as: 'all_participants_recognition_memory_data.csv'

FINAL DATASET SUMMARY:
    Participant_ID  Gender  Hits  FalseAlarms  dprime  RT_Hits
0                0    Male     7            0   1.754  1687.02
1                1    Male     6            0   1.511  1722.62
2                2    Male     8            0   2.029  1365.28
3                3    Male     5            2   0.000  1356.36
4                4    Male     5            3  -0.524  1444.46
5                5  Female     6            1   0.754  1027.77
6                6    Male     4            2  -0.230  1246.68
7                7    Male     7            0   1.754  1101.72
8                8    Male     6            1   0.754  1514.22
9                9    Male     6            2   0.230  1373.85
10              10    Male     5            1   0.524  1290.64
11              11    Male     5            1   0.524  1661.11
12              12  Female     4            2  