In [11]:
import os
import numpy as np
import scipy.io as sio
import pandas as pd

# === CONFIGURATION ===
BASE_DIR = r'G:\results'   # Update this if needed
MAT_FILE_FOR_NAMES = r'G:\results\conn_project01_Rest\firstlevel\RRC_01\resultsROI_Condition001.mat'
PROJECTS = {
    'Rest': r'conn_project01_Rest\firstlevel\RRC_01',
    'SCAP': r'conn_project02_Scap\firstlevel\RRC_01',
    'SST_Go': r'conn_project03_Stopsignal\firstlevel\RRC_01',
    'SST_Stop': r'conn_project03_Stopsignal\firstlevel\RRC_01'
}
N_ROIS = 6

# === STEP 1: Extract Real ROI Names ===
roi_mat = sio.loadmat(MAT_FILE_FOR_NAMES)
roi_names_raw = roi_mat['names']

roi_names = []
for i in range(roi_names_raw.shape[1]):
    roi_names.append(str(roi_names_raw[0, i][0]))

# Generate feature names based on ROI pairs
feature_names = []
for i in range(N_ROIS):
    for j in range(i+1, N_ROIS):
        feature_names.append(f"{roi_names[i]}_to_{roi_names[j]}")

# Clean feature names
feature_names = [name.replace(' ', '_').replace('-', '_') for name in feature_names]

# === FUNCTION TO DEFINE LABELS ===
def get_label(subject_id, condition):
    if condition in ['Rest', 'SCAP']:
        return 0 if subject_id <= 121 else 1
    elif condition in ['SST_Go', 'SST_Stop']:
        return 0 if subject_id <= 120 else 1

# === FUNCTION TO EXTRACT FEATURES ===
def extract_features(mat_file):
    data = sio.loadmat(mat_file)
    Z = data['Z']
    features = Z[np.triu_indices(N_ROIS, k=1)]
    return features

# === STEP 2: Generate Regular Datasets ===
all_sst_go = {}
all_sst_stop = {}

for condition, path in PROJECTS.items():
    rows = []
    full_path = os.path.join(BASE_DIR, path)
    
    for file in os.listdir(full_path):
        if file.startswith('resultsROI_Subject') and file.endswith('.mat'):
            if 'SST' in condition:
                if condition == 'SST_Go' and 'Condition001' not in file:
                    continue
                if condition == 'SST_Stop' and 'Condition002' not in file:
                    continue
            else:
                if 'Condition001' not in file:
                    continue

            subject_id = int(file.split('_')[1].replace('Subject', ''))
            label = get_label(subject_id, condition)
            features = extract_features(os.path.join(full_path, file))

            row = [subject_id] + list(features) + [label]
            rows.append(row)

            # Store for SST difference
            if condition == 'SST_Go':
                all_sst_go[subject_id] = features
            if condition == 'SST_Stop':
                all_sst_stop[subject_id] = features

    df = pd.DataFrame(rows, columns=['Subject_ID'] + feature_names + ['Label'])
    df.to_csv(f"{condition}_dataset.csv", index=False)
    print(f"✅ {condition}_dataset.csv saved! Shape: {df.shape}")

# === STEP 3: Generate SST_Diff Dataset ===
diff_rows = []
common_subjects = set(all_sst_go.keys()) & set(all_sst_stop.keys())

for subject_id in sorted(common_subjects):
    diff_features = all_sst_stop[subject_id] - all_sst_go[subject_id]
    label = get_label(subject_id, 'SST_Go')  # Same label for both phases
    row = [subject_id] + list(diff_features) + [label]
    diff_rows.append(row)

df_diff = pd.DataFrame(diff_rows, columns=['Subject_ID'] + feature_names + ['Label'])
df_diff.to_csv("SST_Diff_dataset.csv", index=False)
print(f"✅ SST_Diff_dataset.csv saved! Shape: {df_diff.shape}")

print("\n🎉 All datasets successfully created with real ROI names!")


✅ Rest_dataset.csv saved! Shape: (162, 17)
✅ SCAP_dataset.csv saved! Shape: (162, 17)
✅ SST_Go_dataset.csv saved! Shape: (160, 17)
✅ SST_Stop_dataset.csv saved! Shape: (160, 17)
✅ SST_Diff_dataset.csv saved! Shape: (160, 17)

🎉 All datasets successfully created with real ROI names!


In [1]:
import pandas as pd
import os

# === CONFIGURATION ===
DATASET_DIR = r'G:\csv_outputs'
output_file = os.path.join(DATASET_DIR, 'Combined_4Condition_Cleaned.csv')
conditions = ['Rest', 'SCAP', 'SST_Go', 'SST_Stop']

datasets = {}
for cond in conditions:
    df = pd.read_csv(os.path.join(DATASET_DIR, f"{cond}_dataset.csv"))

    # Rename feature columns
    feature_cols = [col for col in df.columns if col not in ['Subject_ID', 'Label']]
    df.rename(columns={col: f"{cond}_{col}" for col in feature_cols}, inplace=True)
    df.rename(columns={'Label': f'{cond}_Label'}, inplace=True)

    datasets[cond] = df

# Merge all on Subject_ID
merged = datasets['Rest']
for cond in ['SCAP', 'SST_Go', 'SST_Stop']:
    merged = pd.merge(merged, datasets[cond], on='Subject_ID', how='inner')

# Keep only rows where all labels match
merged = merged[
    (merged['Rest_Label'] == merged['SCAP_Label']) &
    (merged['Rest_Label'] == merged['SST_Go_Label']) &
    (merged['Rest_Label'] == merged['SST_Stop_Label'])
].copy()

# Drop all but one Label
merged['Label'] = merged['Rest_Label']
merged.drop(columns=['Rest_Label', 'SCAP_Label', 'SST_Go_Label', 'SST_Stop_Label'], inplace=True)

# Save final dataset
merged.to_csv(output_file, index=False)
print(f"✅ Cleaned combined dataset saved: {output_file}")
print(f"Subjects retained: {merged.shape[0]}, Features: {merged.shape[1] - 2}")


✅ Cleaned combined dataset saved: G:\csv_outputs\Combined_4Condition_Cleaned.csv
Subjects retained: 159, Features: 60
