In [1]:
#Set up

import pandas as pd
import glob
import os
import csv



# Set the main path to the "Data" folder
main_path = r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data'
os.chdir(main_path)


In [2]:
# PEAPQ

# Step 1: Find the CSV file in the PEAPQ folder using wildcards
peapq_folder = 'PEAPQ'
file_pattern = os.path.join(peapq_folder, 'PEAP-Q+-+PARC*_*.*.csv')
file_list = glob.glob(file_pattern)

if not file_list:
    print("No files found matching the pattern")
else:
    # Assuming the first matching file is the one we want
    csv_file = file_list[0]

    # Step 2: Load the CSV file
    df = pd.read_csv(csv_file)

    # Step 3: Filter out rows without subject information and only include subject numbers 1610 or greater
    df = df.dropna(subset=['subject#'])
    df['subject#'] = pd.to_numeric(df['subject#'], errors='coerce')  # Convert subject# to numeric
    df = df[df['subject#'] >= 1610]

    if df.empty:
        print("No valid rows for analysis")
    else:
        # Step 4: Calculate the average of the three columns for each subject
        # Ensure the subject number column is treated as a categorical variable for proper grouping
        df['subject#'] = df['subject#'].astype('category')
        df['PEAPQ_SelfRateProf'] = df[['Self Rate Prof_1', 'Self Rate Prof_2', 'Self Rate Prof_3']].mean(axis=1)

        # Step 5: Create a summary DataFrame
        summary_df = df[['subject#', 'PEAPQ_SelfRateProf']].copy()
        summary_df.rename(columns={'subject#': 'Subject'}, inplace=True)

        # Optionally, save this summary as a new CSV file or update an existing summary sheet
        summary_csv_path = os.path.join(peapq_folder, 'PEAPQ_Summary.csv')
        summary_df.to_csv(summary_csv_path, index=False)

        print(f"Summary saved to {summary_csv_path}")

Summary saved to PEAPQ\PEAPQ_Summary.csv


In [3]:
# LEAPQ


# Set the main path to the "Data" folder
#main_path = r'C:\Users\Maylaka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data'
#os.chdir(main_path)

# Step 1: Find the CSV file in the LEAPQ folder using wildcards
leapq_folder = 'LEAPQ'
file_pattern = os.path.join(leapq_folder, 'ALEAP-Q+Short+-+PARC_*')
file_list = glob.glob(file_pattern)

if not file_list:
    print("No files found matching the pattern")
else:
    # Assuming the first matching file is the one we want
    csv_file = file_list[0]

    # Step 2: Load the CSV file
    df = pd.read_csv(csv_file)

    # Step 3: Check values in "langs order_1" and "langs order_2" for language classification
    df['Language Classification'] = 'Monolingual'  # Default value

    # Monolingual case
    mono_condition = df['langs order_1'].str.isalpha() & df['langs order_2'].str.isalpha()
    df.loc[mono_condition, 'Language Classification'] = 'Monolingual'

    # Early Bilingual, Late Bilingual, Other cases
    df['lang2 age acquire'] = pd.to_numeric(df['lang2 age acquire'], errors='coerce')  # Convert to numeric
    early_condition = (df['lang2 age acquire'] < 7) & (df[['lang2 prof rating_1', 'lang2 prof rating_2', 'lang2 prof rating_3']].mean(axis=1, skipna=True) > 7)
    late_condition = (df['lang2 age acquire'] >= 7) & (df[['lang2 prof rating_1', 'lang2 prof rating_2', 'lang2 prof rating_3']].mean(axis=1, skipna=True) > 7)

    df.loc[early_condition, 'Language Classification'] = 'Early Bilingual'
    df.loc[late_condition, 'Language Classification'] = 'Late Bilingual'
    df.loc[~(early_condition | late_condition | mono_condition), 'Language Classification'] = 'Other'

    # Step 4: Calculate LEAPQ_EngProf based on English presence
    df['LEAPQ_EngProf'] = 'manual check needed'  # Default value

    eng1_condition = df['langs order_1'].str.contains('English', case=False, na=False)
    eng2_condition = df['langs order_2'].str.contains('English', case=False, na=False)
    eng3_condition = df['langs order_3'].str.contains('English', case=False, na = False)

    df.loc[eng1_condition, 'LEAPQ_EngProf'] = df[['lang1 prof rating_1', 'lang1 prof rating_2', 'lang1 prof rating_3']].mean(axis=1, skipna=True)
    df.loc[eng2_condition, 'LEAPQ_EngProf'] = df[['lang2 prof rating_1', 'lang2 prof rating_2', 'lang2 prof rating_3']].mean(axis=1, skipna=True)
    df.loc[eng3_condition, 'LEAPQ_EngProf'] = df[['lang2 prof rating_1', 'lang2 prof rating_2', 'lang2 prof rating_3']].mean(axis=1, skipna=True)
    
    # Filter rows based on subject number
    df['Subject ID'] = pd.to_numeric(df['Subject ID'], errors='coerce')  # Convert to numeric
    df = df[df['Subject ID'].notna()]  # Exclude rows without a subject number
    df = df[df['Subject ID'] >= 1610]  # Filter subject numbers >= 1610
   

    # Step 5: Create a summary DataFrame
    summary_df = df[['Subject ID', 'LEAPQ_EngProf']].copy()
    summary_df.rename(columns={'Subject ID': 'Subject'}, inplace=True)

    # Optionally, save this summary as a new CSV file or update an existing summary sheet
    summary_csv_path = os.path.join(leapq_folder, 'LEAPQ_Summary.csv')
    summary_df.to_csv(summary_csv_path, index=False)

    print(f"Summary saved to {summary_csv_path}")



Summary saved to LEAPQ\LEAPQ_Summary.csv


In [4]:
# Demographics

# Step 1: Find the CSV file in the Demographics folder using wildcards
demo_folder = 'Demographics'
demo_file_pattern = os.path.join(demo_folder, 'PARCfMRI+Consent+Form+and+Demographics_*')
demo_file_list = glob.glob(demo_file_pattern)

if not demo_file_list:
    print("No files found matching the pattern")
else:
    # Assuming the first matching file is the one we want
    demo_csv_file = demo_file_list[0]

    # Step 2: Load the CSV file
    demo_df = pd.read_csv(demo_csv_file)

    # Step 3: Filter subjects with number equal to or greater than 1610
    demo_df['Subject #'] = pd.to_numeric(demo_df['Subject #'], errors='coerce')  # Convert to numeric
    demo_df = demo_df[demo_df['Subject #'] >= 1610]

    # Step 4: Extract relevant columns
    demo_summary_df = demo_df[['Subject #', 'age', 'sex', 'lang classification', 'L1','handedness']].copy()

    # Step 5: Rename columns
    demo_summary_df.rename(columns={'Subject #': 'Subject', 'age': 'Age', 'sex': 'Sex', 'lang classification': 'Language_ClassificationDemo', 'L1': 'L1', 'handedness': 'Handedness'}, inplace=True)

    # Optionally, save this summary as a new CSV file or update an existing summary sheet
    demo_summary_csv_path = os.path.join(demo_folder, 'Demographics_Summary.csv')
    demo_summary_df.to_csv(demo_summary_csv_path, index=False)

    print(f"Demographics Summary saved to {demo_summary_csv_path}")


Demographics Summary saved to Demographics\Demographics_Summary.csv


In [5]:
# Nelson Denny 

# Step 1: Find the CSV file in the ND Comp - Form G folder using wildcards
ndcomp_folder = 'ND Comp - Form G'
ndcomp_file_pattern = os.path.join(ndcomp_folder, 'Nelson+Denny+Comprehension+-+Version+G+*')
ndcomp_file_list = glob.glob(ndcomp_file_pattern)

if not ndcomp_file_list:
    print("No files found matching the pattern")
else:
    # Assuming the first matching file is the one we want
    ndcomp_csv_file = ndcomp_file_list[0]

    # Step 2: Load the CSV file
    ndcomp_df = pd.read_csv(ndcomp_csv_file)

   # Step 3: Extract relevant columns and filter subjects
    ndcomp_summary_df = ndcomp_df[['Subject ID', 'SC0', 'reading rate']].copy()
    ndcomp_summary_df = ndcomp_summary_df[ndcomp_summary_df['Subject ID'].notna()]
    ndcomp_summary_df['Subject ID'] = ndcomp_summary_df['Subject ID'].astype(int)
    ndcomp_summary_df = ndcomp_summary_df[ndcomp_summary_df['Subject ID'] >= 1610]

    # Step 4: Rename columns
    ndcomp_summary_df.rename(columns={'Subject ID': 'Subject', 'SC0': 'NDComp', 'reading rate': 'NDRR'}, inplace=True)

    # Optionally, save this summary as a new CSV file or update an existing summary sheet
    ndcomp_summary_csv_path = os.path.join(ndcomp_folder, 'NDComp_Summary.csv')
    ndcomp_summary_df.to_csv(ndcomp_summary_csv_path, index=False)

    print(f"ND Comp - Form G Summary saved to {ndcomp_summary_csv_path}")



ND Comp - Form G Summary saved to ND Comp - Form G\NDComp_Summary.csv


In [4]:
# Card Sort

# Load in the individual text files of data 

# Specify the path to your folder containing text files
#folder_path = r'C:\Users\Maylaka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\CardSort\experiment_data'
folder_path = r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\CardSort\experiment_data'

# Get a list of all text files in the folder
files = [file for file in os.listdir(folder_path) if file.endswith('.txt')]


#Convert the files to a df

# Initialize an empty dataframe to store the data
combined_df = pd.DataFrame()

# Define column names for the card sorting data
column_names = ['Block', 'Condition', 'CondName', 'TopType', 'TopStim', 'BtmType', 'BtmStim', 'Resp', 'Status', 'RT', 
               'SymRespCount', 'TxtRespCount', 'IncorrRespCount']

# Loop through each file and read it into a dataframe
for file in files:
    file_path = os.path.join(folder_path, file)
    
    # Assuming space is the delimiter, you can adjust it accordingly
    df = pd.read_csv(file_path, delimiter=' ', names = column_names)
    
        # Add a new column named 'FileName' with the name of the text file
    df['FileName'] = os.path.splitext(file)[0]

    
    # Append the dataframe to the combined dataframe
    #combined_df = combined_df.append(df, ignore_index=True)
    combined_df = pd.concat([combined_df, df], ignore_index = True)


# Extract the substring after 'data.' and assign it to the 'Subject' column
combined_df['Id'] = combined_df['FileName'].str.extract(r'data\.(.*)')

print(combined_df)

#Pull the counts of verbal vs visual cs responses into a new df

# Initialize an empty dataframe for the results
result_df = pd.DataFrame(columns=['Id', 'SymRespCount', 'TxtRespCount', 'IncorrRespCount'])

# Group by 'Subject' and find the maximum values for each column
max_values = combined_df.groupby('Id')[['SymRespCount', 'TxtRespCount', 'IncorrRespCount']].max().reset_index()


# Merge the max_values DataFrame with result_df on 'Subject'
#result_df = result_df.append(max_values, ignore_index=True)
result_df = pd.concat([result_df, max_values], ignore_index = True)


# Load in the survey data to get subject # and IRQ; Perform IRQ groupings

# Load the data.csv file
data_df = pd.read_csv(r'CardSort\data.csv')

# IRQ Analysis

# Mapping of IRQ questions to categories
irq_visual_questions = ['IRQ_1', 'IRQ_3', 'IRQ_4', 'IRQ_8', 'IRQ_10', 'IRQ_11', 'IRQ_15', 'IRQ_16', 'IRQ_17', 'IRQ_19', 'IRQ_21', 'IRQ_25']
irq_verbal_questions = ['IRQ_2', 'IRQ_5', 'IRQ_6', 'IRQ_7', 'IRQ_9', 'IRQ_12', 'IRQ_13', 'IRQ_14', 'IRQ_18', 'IRQ_20', 'IRQ_22', 'IRQ_23', 'IRQ_24', 'IRQ_26']

# Reverse code values for IRQ_20 in irq_verbal_questions
reverse_code_mapping = {5: 1, 4: 2, 3: 3, 2: 4, 1: 5}
data_df['IRQ_20'] = data_df['IRQ_20'].map(reverse_code_mapping)

#print(data_df['IRQ_20'])

# Pull out the Coding Specific IRQ questions: 5 (Write - Verbal), 10 (Debug - Visual), 17 (Write - Visual) , 23 (Debug - Verbal)
irq_verbal_code = ['IRQ_5', 'IRQ_23']
irq_visual_code = ['IRQ_10', 'IRQ_17']

# Create new columns for IRQ_visual and IRQ_verbal in data_df
data_df['IRQ_visual'] = data_df[irq_visual_questions].mean(axis=1)
data_df['IRQ_verbal'] = data_df[irq_verbal_questions].mean(axis=1)
data_df['IRQ_visual_code'] = data_df[irq_visual_code].mean(axis=1)
data_df['IRQ_verbal_code'] = data_df[irq_verbal_code].mean(axis=1)

# Initialize a new column 'Expt_number' with NaN values
result_df['Subject'] = float('nan')
result_df['IRQ_visual'] = float('nan')
result_df['IRQ_verbal'] = float('nan')
result_df['IRQ_visual_code'] = float('nan')
result_df['IRQ_verbal_code'] = float('nan')

# Iterate through each row in result_df
for index, row in result_df.iterrows():
    # Extract the unique part of the 'Subject' value for partial matching
    partial_match = row['Id'].split('.')[0]
    
    # Perform partial string matching and get the corresponding 'subjnum_1' value
    match_row = data_df[data_df['participant'].str.contains(partial_match)]
    
    # If a match is found, update the 'Expt_number' column in result_df
    if not match_row.empty:
        result_df.at[index, 'Subject'] = match_row['subjnum_1'].values[0]
        result_df.at[index, 'IRQ_visual'] = match_row['IRQ_visual'].values[0]
        result_df.at[index, 'IRQ_verbal'] = match_row['IRQ_verbal'].values[0]
        result_df.at[index, 'IRQ_visual_code'] = match_row['IRQ_visual_code'].values[0]
        result_df.at[index, 'IRQ_verbal_code'] = match_row['IRQ_verbal_code'].values[0]

# Drop the 'Id' column
result_df = result_df.drop(['Id'], axis=1)

# Calculate VerbalBiasScore and add it as a new column
result_df['VerbalBiasScore'] = (result_df['TxtRespCount'] - result_df['SymRespCount']) / (result_df['TxtRespCount'] + result_df['SymRespCount'])

# Filter subjects
result_df = result_df[result_df['Subject'].notna()]
result_df['Subject'] = pd.to_numeric(result_df['Subject'], errors='coerce')
result_df = result_df[result_df['Subject'] >= 1610]

# Save the result_df to an analysis summary CSV file
csv_file_path = os.path.join(r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\CardSort\CardSort_Summary.csv')
result_df.to_csv(csv_file_path, index=False)

print(f"Analysis summary has been written to {csv_file_path}")


                    Block  Condition CondName TopType TopStim BtmType BtmStim  \
0      CardSort_PracBlock          1      Con     Sym   Spade    Text   Spade   
1      CardSort_PracBlock          1      Con     Txt   Heart     Sym   Heart   
2      CardSort_PracBlock          1      Con     Txt   Spade     Sym   Spade   
3      CardSort_PracBlock          1      Con     Sym    Club     Txt    Club   
4         CardSort_Block1          1      Con     Sym   Heart     Txt   Heart   
...                   ...        ...      ...     ...     ...     ...     ...   
11951     CardSort_Block4          2    Incon     Txt    Club     Sum   Heart   
11952     CardSort_Block4          1      Con     Sym   Heart     Txt   Heart   
11953     CardSort_Block4          2    Incon     Sym    Club     Txt   Heart   
11954     CardSort_Block4          1      Con     Sym    Club     Txt    Club   
11955     CardSort_Block4          1      Con     Txt   Heart     Sym   Heart   

       Resp  Status    RT  

In [3]:
# Digit Span

# Load in the individual text files of data 

# Specify the path to your folder containing text files
folder_path = r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\Fwd Digit Span\experiment_data'


# Get a list of all text files in the folder
files = [file for file in os.listdir(folder_path) if file.endswith('.txt')]

#print(files)

#Convert the files to a df

# Initialize an empty dataframe to store the data
combined_df = pd.DataFrame()

# Define column names for the card sorting data
column_names = ['Subject', 'Id', 'task', 'trial', 'direction', 'setsize', 'correctresp', 'subresp', 'Acc', 'RT', 'Span']

# Loop through each file and read it into a dataframe
# Loop through each file and read it into a dataframe
for file in files:
    file_path = os.path.join(folder_path, file)
    
    # Assuming space is the delimiter, you can adjust it accordingly
    df = pd.read_csv(file_path, delimiter=' ', names = column_names)
    
        # Add a new column named 'FileName' with the name of the text file
    df['FileName'] = os.path.splitext(file)[0]
    
    # Append the dataframe to the combined dataframe
    #combined_df = combined_df.append(df, ignore_index=True)
    combined_df = pd.concat([combined_df, df], ignore_index=True)
    

#Pull the span into a new df

# Initialize an empty dataframe for the results
result_df = pd.DataFrame(columns=['Subject', 'Span'])

# Group by 'Subject' and find the maximum values for each column
max_values = combined_df.groupby('Subject')[['Span']].max().reset_index()


# Merge the max_values DataFrame with result_df on 'Subject'
#result_df = result_df.append(max_values, ignore_index=True)
result_df = pd.concat([result_df, max_values], ignore_index=True)

#Filter subjects 
result_df = result_df[result_df['Subject'].notna()]
result_df['Subject'] = pd.to_numeric(result_df['Subject'], errors='coerce')
result_df = result_df[result_df['Subject'] >= 1610]

# Save the result_df to an analysis summary CSV file
csv_file_path = os.path.join(r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\Fwd Digit Span\DigitSpan_Summary.csv')
result_df.to_csv(csv_file_path, index=False)

print(f"Analysis summary has been written to {csv_file_path}")



     Subject         Id     task  trial direction  setsize  correctresp  \
0          0   336916.0  fortask      1   forward        3          582   
1          0   336916.0  fortask      2   forward        3          694   
2          0   336916.0  fortask      3   forward        4         6439   
3          0   336916.0  fortask      4   forward        4         7286   
4          0   336916.0  fortask      5   forward        5        42731   
..       ...        ...      ...    ...       ...      ...          ...   
737    16330  1373204.0  fortask     10   forward        7      4179386   
738    16330  1373204.0  fortask     11   forward        8     58192647   
739    16330  1373204.0  fortask     12   forward        8     38295174   
740    16330  1373204.0  fortask     13   forward        9    275862584   
741    16330  1373204.0  fortask     14   forward        9    713942568   

       subresp  Acc     RT  Span  \
0          582    1   3253     0   
1          694    1   2934 

In [2]:
# PyTask


# Path to the main folder containing subject folders
main_folder_path = r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\PyTask'

# List to store the summary data
summary_data = []

# Loop through each subject folder
for subject_folder_name in os.listdir(main_folder_path):
    subject_folder_path = os.path.join(main_folder_path, subject_folder_name)

    # Check if it's a directory (to skip any files in the main folder)
    if os.path.isdir(subject_folder_path):
        # Initialize variables for each subject
        subject_data = {'Subject': subject_folder_name, 'PY_ACC': 0, 'PY_RT': 0, 'PY_CCAcc': 0, 'PY_CCRT': 0,
                        'WR_ACC': 0, 'WR_RT': 0, 'WR_CCAcc': 0, 'WR_CCRT': 0,
                        'Count_PY': 0, 'Count_WR': 0}

        # Loop through each condition (PY and WR)
        for condition in ['PY', 'WR']:
            combined_df = pd.DataFrame()  # Combined dataframe for each condition

            # Loop through the 4 relevant text files for each condition
            for file_name in os.listdir(subject_folder_path):
                if file_name.startswith(f'{condition}SubjectResponse'):
                    file_path = os.path.join(subject_folder_path, file_name)

                    # Read the text file into a dataframe
                    df = pd.read_csv(file_path, delim_whitespace=True)

                    # Append the dataframe to the combined_df
                    #combined_df = combined_df.append(df, ignore_index=True)
                    combined_df = pd.concat([combined_df, df], ignore_index = True)

            # Calculate the average ACC, RT, CCAcc, and CCRT for the condition
            if not combined_df.empty:
                subject_data[f'{condition}_ACC'] = combined_df['Acc'].mean()
                subject_data[f'{condition}_RT'] = combined_df['RT'].mean()
                subject_data[f'{condition}_CCAcc'] = combined_df['CCAcc'].mean()
                subject_data[f'{condition}_CCRT'] = combined_df['CCRT'].mean()
                subject_data[f'Count_{condition}'] = len(combined_df)

        # Append the subject_data to the summary_data list
        summary_data.append(subject_data)
        #summary_data = pd.concat([summary_data, subject_data], ignore_index = True)

# Write the summary_data to a CSV file
csv_file_path = r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\PyTask\PyTask_Summary.csv'
with open(csv_file_path, 'w', newline='') as csv_file:
    fieldnames = ['Subject', 'PY_ACC', 'PY_RT', 'PY_CCAcc', 'PY_CCRT', 'Count_PY', 'WR_ACC', 'WR_RT', 'WR_CCAcc', 'WR_CCRT', 'Count_WR']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    writer.writeheader()
    for row in summary_data:
        # Exclude additional fields not in fieldnames
        row_to_write = {key: row[key] for key in fieldnames}
        writer.writerow(row_to_write)

print(f"Summary data has been written to {csv_file_path}")

Summary data has been written to C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\PyTask\PyTask_Summary.csv


In [3]:
# Localizer

# Path to the main folder containing subject folders
main_folder_path = r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data\Localizer'


# List to store the combined data
combined_data = []

# Loop through each subject folder
for subject_folder_name in os.listdir(main_folder_path):
    subject_folder_path = os.path.join(main_folder_path, subject_folder_name)

    # Check if it's a directory (to skip any files in the main folder)
    if os.path.isdir(subject_folder_path):
        # Initialize variables for each subject
        subject_data = []

        # Loop through the SubjectResponse* files in the subject folder
        for file_name in os.listdir(subject_folder_path):
            if file_name.startswith('SubjectResponse'):
                file_path = os.path.join(subject_folder_path, file_name)
                #print(f"Reading file: {file_path}")

                # Read the text file into a DataFrame
                df = pd.read_csv(file_path, delim_whitespace=True, header=None, names=['Run', 'Condition', 'Stim 1', 'Stim 2', 'Stim 3', 'Stim 4', 'Stim 5', 'Stim 6'])
                
                # Add the 'Subject' column to the DataFrame
                df.insert(0, 'Subject', subject_folder_name)

                # Append the DataFrame to the subject_data
                subject_data.append(df)

        # Check if any SubjectResponse* files were found
        if subject_data:
            # Concatenate the DataFrames for this subject
            subject_df = pd.concat(subject_data, ignore_index=True)

            # Calculate average accuracy for RJ and LS conditions
            rj_accuracy = subject_df.loc[subject_df['Condition'] == 'RJ', 'Stim 1':'Stim 6'].mean().mean()
            ls_accuracy = subject_df.loc[subject_df['Condition'] == 'LS', 'Stim 1':'Stim 6'].mean().mean()

            # Append results to combined_data
            combined_data.append({'Subject': subject_folder_name, 'RJ_Acc': rj_accuracy, 'LS_Acc': ls_accuracy})

# Check if any combined data was found
if combined_data:
    # Create results_summary DataFrame
    results_summary = pd.DataFrame(combined_data)

    # Save the results_summary to a CSV file
    csv_file_path = 'Localizer/Localizer_Summary.csv'
    results_summary.to_csv(csv_file_path, index=False)

    print(f"Results summary has been written to {csv_file_path}")
else:
    print("No SubjectResponse* files found.")


Results summary has been written to Localizer/Localizer_Summary.csv


In [4]:
# Combining it all into one summary sheet!

# Set the base path
base_path = r'C:\Users\Malayka Mottarella\OneDrive - UW\CCDL\CCDL Shared\Expt\PARC\Data'

# Load Demographics_Summary
demographics_path = os.path.join(base_path, 'Demographics', 'Demographics_Summary.csv')
demographics_df = pd.read_csv(demographics_path)

# Load LEAPQ_Summary
leapq_path = os.path.join(base_path, 'LEAPQ', 'LEAPQ_Summary.csv')
leapq_df = pd.read_csv(leapq_path)

# Load NDComp_Summary
ndcomp_path = os.path.join(base_path, 'ND Comp - Form G', 'NDComp_Summary.csv')
ndcomp_df = pd.read_csv(ndcomp_path)

# Load PEAPQ_Summary
peapq_path = os.path.join(base_path, 'PEAPQ', 'PEAPQ_Summary.csv')
peapq_df = pd.read_csv(peapq_path)

# Load Digit Span
span_path = os.path.join(base_path, 'Fwd Digit Span', 'DigitSpan_Summary.csv')
span_df = pd.read_csv(span_path)

# Load Card Sort Summary
cardsort_path = os.path.join(base_path, 'CardSort', 'CardSort_Summary.csv')
cardsort_df = pd.read_csv(cardsort_path)

# Load Localizer Summary
loc_path = os.path.join(base_path, 'Localizer', 'Localizer_Summary.csv')
loc_df = pd.read_csv(loc_path)

# Load PyTask Summary
py_path = os.path.join(base_path, 'PyTask', 'PyTask_Summary.csv')
py_df = pd.read_csv(py_path)


# Merge dataframes based on the "Subject" column
merged_df = pd.merge(demographics_df, leapq_df, how = 'left', on = 'Subject')
merged_df = pd.merge(merged_df, ndcomp_df, how = 'left', on = 'Subject')
merged_df = pd.merge(merged_df, peapq_df, how = 'left', on='Subject')
merged_df = pd.merge(merged_df, span_df, how = 'left', on='Subject')
merged_df = pd.merge(merged_df, cardsort_df, how = 'left', on='Subject')
merged_df = pd.merge(merged_df, loc_df, how = 'left', on='Subject')
merged_df = pd.merge(merged_df, py_df, how = 'left', on='Subject')
print(merged_df)




# Save the merged dataframe to a CSV file
summary_csv_path = os.path.join(base_path, 'Analysis_Summary.csv')
merged_df.to_csv(summary_csv_path, index=False)

print(f"Analysis summary has been written to {summary_csv_path}")


    Subject   Age     Sex Language_ClassificationDemo          L1  \
0    1610.0  19.0    Male             Early Bilingual     English   
1    1611.0  21.0    Male                 Monolingual    Chinese    
2    1612.0  22.0  Female              Late Bilingual    Mandarin   
3    1619.0  21.0  Female             Early Bilingual     Chinses   
4    1615.0  30.0  Female                 Monolingual     English   
5    1616.0  20.0  Female                 Monolingual     English   
6    1613.0  26.0    Male             Early Bilingual     English   
7    1617.0  19.0    Male              Late Bilingual     English   
8    1614.0  20.0  Female              Late Bilingual     English   
9    1620.0  25.0    Male             Early Bilingual   Taiwanese   
10   1618.0  24.0    Male              Late Bilingual  Indonesian   
11   1621.0  19.0    Male                 Monolingual     English   
12   1623.0  21.0    Male             Early Bilingual   Cantonese   
13   1622.0  19.0    Male         