In [25]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

def convert_excel_to_csv(folder_path):
    """
    Convert all Excel files in a folder to CSV and show their previews.
    
    Args:
        folder_path (str): Path to the folder containing Excel files
    """
    # Convert folder path to Path object
    folder = Path(folder_path)
    
    # Loop through all files in the folder
    for file in folder.glob('*.xlsx'):
        try:
            # Read the Excel file
            df = pd.read_excel(file)
            
            # Create CSV filename (same name, different extension)
            csv_path = file.with_suffix('.csv')
            
            # Save as CSV
            df.to_csv(csv_path, index=False)
            
            # Print information about the conversion
            print(f"\nConverted: {file.name} → {csv_path.name}")
            print(f"Shape: {df.shape}")
            print("\nPreview:")
            print(df.head())
            print("\n" + "="*50)
            
        except Exception as e:
            print(f"Error processing {file.name}: {str(e)}")

# Example usage:
# convert_excel_to_csv('data')

In [2]:
convert_excel_to_csv('/Users/paigegiese/SYG/wids-2025-mupd/data/TRAIN/')


Converted: TRAIN_QUANTITATIVE_METADATA.xlsx → TRAIN_QUANTITATIVE_METADATA.csv
Shape: (1213, 19)

Preview:
  participant_id  EHQ_EHQ_Total  ColorVision_CV_Score  APQ_P_APQ_P_CP  \
0   UmrK0vMLopoR          40.00                    13               3   
1   CPaeQkhcjg7d         -94.47                    14               3   
2   Nb4EetVPm3gs         -46.67                    14               4   
3   p4vPhVu91o4b         -26.68                    10               5   
4   M09PXs7arQ5E           0.00                    14               5   

   APQ_P_APQ_P_ID  APQ_P_APQ_P_INV  APQ_P_APQ_P_OPD  APQ_P_APQ_P_PM  \
0              10               47               13              11   
1              13               34               18              23   
2              10               35               16              10   
3              12               39               19              16   
4              15               40               20              24   

   APQ_P_APQ_P_PP  SDQ_SDQ_

In [6]:
mri  = pd.read_csv('/Users/paigegiese/SYG/wids-2025-mupd/data/TRAIN/TRAIN_FUNCTIONAL_CONNECTOME_MATRICES.csv')

mri.columns[:2]

Index(['participant_id', '0throw_1thcolumn'], dtype='object')

In [7]:
mri.columns[-5:]

Index(['196throw_198thcolumn', '196throw_199thcolumn', '197throw_198thcolumn',
       '197throw_199thcolumn', '198throw_199thcolumn'],
      dtype='object')

In [19]:
df = pd.read_csv('/Users/paigegiese/SYG/wids-2025-mupd/data/TRAIN/TRAIN_FUNCTIONAL_CONNECTOME_MATRICES.csv')
# Extract numerical indices from column names
df = df.iloc[:,1:]
# df.columns = [int(col.split('_')[1].replace('thcolumn', '')) for col in df.columns]
# df.index = [int(idx.split('_')[0].replace('throw', '')) for idx in df.index]


In [35]:
# # Extract numerical indices from column names
# region_pairs = [
#     (int(col.split('throw_')[0]), int(col.split('throw_')[1].replace('thcolumn', '')))
#     for col in df.columns
# ]

# # Convert to a NumPy array
# region_pairs = np.array(region_pairs)
# Parse the (row, column) indices from column names
region_pairs = []
for col in df.columns:
    try:
        i, j = col.split('throw_')
        i = int(i)  # Convert "0throw" → 0
        j = int(j.replace('thcolumn', ''))  # Convert "2thcolumn" → 2
        region_pairs.append((i, j))
    except ValueError:
        print(f"Skipping malformed column: {col}")

region_pairs = np.array(region_pairs)

# Check if the number of extracted pairs matches the dataframe's width
assert len(region_pairs) == df.shape[1]


In [40]:
num_regions = max([int(col.split('_')[1].replace('thcolumn', '')) for col in df.columns])
print(f"Max column index: {num_regions}")  # Should print 199


Max column index: 199


In [42]:
def reshape_to_matrix(patient_data):
    matrix = np.zeros((199, 199))  # Initialize 199x199 matrix
    
    for idx, (i, j) in enumerate(region_pairs):
        matrix[i, j - 1] = patient_data[idx]  # Subtract 1 from j

    return matrix


In [44]:
connectomes = np.array([reshape_to_matrix(row) for _, row in df.head(3).iterrows()])

  matrix[i, j - 1] = patient_data[idx]  # Subtract 1 from j


In [45]:
print(connectomes.shape)

(3, 199, 199)


In [56]:
connectomes[0][:5][1]

array([ 0.00000000e+00, -7.03962674e-02,  2.33599602e-01,  8.24384655e-02,
       -1.21212857e-02, -2.22867124e-03, -7.17197738e-02,  3.14835852e-02,
       -3.43224186e-02, -1.86117313e-02,  8.17194202e-04,  2.74316298e-02,
       -1.35483243e-02,  4.86015085e-02,  3.54641265e-02,  7.35107965e-03,
        3.71000308e-02,  1.88224778e-02,  2.09377452e-02,  1.81010386e-02,
        5.60784924e-03, -1.35455250e-02,  1.80556399e-02, -1.38761827e-02,
       -4.86942266e-02, -3.10935987e-02,  5.38412398e-03,  4.05557224e-03,
        1.11012385e-01,  2.28846471e-02, -2.31421715e-02,  2.86183433e-02,
        2.15776294e-02, -3.03614288e-02, -2.62225143e-03,  1.39007237e-02,
        1.87049231e-02,  2.09891975e-02, -5.42201101e-02,  1.29660755e-02,
       -4.22778223e-02, -3.30250073e-02,  1.91056569e-02,  7.31147739e-02,
       -9.65946680e-02, -8.62157949e-03, -4.91365228e-02,  7.13588102e-02,
       -2.52707838e-02,  6.86863350e-03,  4.58554372e-02,  5.70405992e-02,
       -2.52462554e-03,  

In [55]:
connectomes[0][:5][0]

array([ 9.34730255e-02,  1.46902344e-01,  6.78925634e-02,  1.51411152e-02,
        7.02207934e-02,  6.39970577e-02,  5.53819618e-02, -3.53354226e-02,
        6.85829692e-02,  2.92706086e-02,  4.19890253e-03,  4.92138106e-02,
       -2.90015042e-02, -1.51080456e-02,  1.24068863e-02,  1.30965922e-02,
       -4.61369848e-03,  2.04644974e-02, -4.05810555e-02,  1.43406363e-02,
        2.35634212e-02,  3.95715820e-03,  6.63870852e-03,  3.79724360e-02,
       -4.04448938e-03,  1.34259117e-02, -7.13744643e-02,  7.81552946e-02,
        5.97448140e-02, -6.26566505e-02,  3.24044116e-02,  8.49646651e-02,
        6.08435605e-02,  4.92061012e-02, -2.05044779e-03, -3.10447287e-02,
       -3.27297546e-03, -2.62885362e-02,  3.23118946e-02, -6.83772043e-02,
       -4.61176400e-02, -3.79851738e-02,  2.05866232e-02, -1.60287418e-02,
       -3.28919086e-02,  2.17710390e-02, -1.90090163e-02, -2.65369119e-03,
       -1.03271091e-02,  1.79561748e-02,  5.38791995e-02, -2.41742884e-02,
        6.74939732e-02, -