In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt

In [43]:
def ParseAverageMetrics(fileName, dir):
    parsedData = {}
    
    fullPath = f"{dir}\{fileName}"
    
    df = pd.read_csv(fullPath)
    # print(df.head())
    # print(df.iloc[0]['Value'])
    
    # 1. Nodal Efficiency
    nodalString = df.iloc[0]['Value']
    valueDict = {}
    for item in nodalString.split(';'):
        if item.strip():  # Ignore empty strings
            key, val = item.split(':')
            valueDict[int(key.strip())] = float(val.strip())

    parsedData['Nodal Efficiency'] = valueDict
    
    # 2. Global Efficiency
    globalEfficiency = float(df.iloc[1]['Value'])
    parsedData['Global Efficiency'] = globalEfficiency
    
    # 3. Degree Centrality
    degreeCentralityString = df.iloc[3]['Value']
    degreeValueDict = {}
    for item in degreeCentralityString.split(';'):
        if item.strip():
            key, value = item.split(':')
            degreeValueDict[int(key.strip())] = float(value.strip())
    
    parsedData['Degree Centrality'] = degreeValueDict
    
    # 4. Average Clustering Coefficient
    averageClustering = float(float(df.iloc[2]['Value']))
    parsedData['Average Clustering'] = averageClustering
    
    return parsedData
    
    

  fullPath = f"{dir}\{fileName}"


In [3]:
def ParsePhenotypicData(fileName, pathToPhenotypicCSV):
    phenotypicData = {}
    df = pd.read_csv(pathToPhenotypicCSV)
    
    
    resultRow = df[df['FILE_ID'] == fileName]
    
    if not resultRow.empty:
        # Define the columns you want to check and assign
        columns_to_check = ['FIQ', 'HANDEDNESS_CATEGORY', 'AGE_AT_SCAN', 'SEX']

        for col in columns_to_check:
            # Check if the value is NaN and handle accordingly
            if pd.isna(resultRow[col].values[0]):  # Check for NaN
                if col in ['FIQ', 'AGE_AT_SCAN', 'SEX']:  # Numeric columns
                    phenotypicData[col] = -9999
                else:  # Non numeric values 'empty'
                    phenotypicData[col] = "Unknown"
            else:
                if col in ['FIQ', 'AGE_AT_SCAN', 'SEX']:  # Numeric columns
                    phenotypicData[col] = float(resultRow[col].values[0])
                else:  # Non-numeric columns
                    phenotypicData[col] = resultRow[col].values[0]

        phenotypicData['DX_GROUP'] = resultRow['DX_GROUP'].values[0]
    return phenotypicData
    
    # print(df.head())

In [4]:
def FindNodesFromBrodmanns(ba_nums: list, pathToMapping):
    df = pd.read_csv(pathToMapping)
    # print(df.head())
    
    results = {}
    for ba in ba_nums:
        
        matchingRows = df[df['ba.label'].str.extract(r'(\d+)')[0].astype(int) == ba]
        
        roiNums = matchingRows['ROI number'].tolist()
        results[ba] = roiNums
        
    return results

In [33]:
import csv
import os
def OutputGraphMetrics(data, dir, fileName):
    # Ensure the directory exists
    os.makedirs(dir, exist_ok=True)
    
    # Remove _mat.txt
    
    # Modify the filename to end with a particular extension
    fileName = f"{fileName}_metrics.csv"
    # Construct the full path for the output file
    outputPath = os.path.join(dir, fileName)
    
    # Write data to a CSV file
    with open(outputPath, mode="w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(["Metric", "Value"])  # Write header
        
        for key, value in data.items():
            if isinstance(value, dict):
                # Convert dictionary to string for storage
                value_str = "; ".join([f"{k}: {v}" for k, v in value.items()])
                writer.writerow([key, value_str])
            else:
                writer.writerow([key, value])


In [None]:
# # We are going to average them here.
# import os

# from glob import glob
# outDir = R"C:\GIT\Connectomics\Research_Project\Binary_Notebooks\Binary_Output\Averaged"
# folders = [R"10%\NetworkMetrics", R"15%\NetworkMetrics", R"20%\NetworkMetrics", R"25%\NetworkMetrics", R"30%\NetworkMetrics"]

# file_list = [os.path.basename(file) for file in glob(os.path.join(folders[0], "*.csv"))]

# print(file_list)

# counter = 0
# for fileName in file_list:
#     # dfs = []
#     metrics = []
    
#     for folder in folders:
#         file_path = os.path.join(folder, fileName)
#         if os.path.exists(file_path):
#             # dfs.append(pd.read_csv(file_path))
#             metrics.append(ParseMetrics(file_path))
    

#     averageMetrics = {
#         "Nodal Efficiency" : {},
#         "Global Efficiency" : 0,
#         "Average Clustering" : 0,
#         "Degree Centrality" : {}
#                     }
#     allNodes = metrics[0]['Nodal Efficiency'].keys()
#     # print(allNodes)
#     for node in allNodes:
#         averageMetrics['Nodal Efficiency'][node] = sum(d['Nodal Efficiency'][node] for d in metrics) / len(metrics)
#         averageMetrics["Degree Centrality"][node] = sum (d['Degree Centrality'][node] for d in metrics) / len(metrics)
#         averageMetrics['Global Efficiency'] = sum (d['Global Efficiency'] for d in metrics) / len(metrics)
#         averageMetrics["Average Clustering"] = sum(d['Average Clustering'] for d in metrics) / len(metrics)
        
#     # print(averageMetrics)
#     strippedFileName = fileName.split('_rois')[0]
#     print(strippedFileName)
    
#     OutputGraphMetrics(averageMetrics, outDir, strippedFileName)
    


['Caltech_0051456_rois_cc400_metrics.csv', 'Caltech_0051457_rois_cc400_metrics.csv', 'Caltech_0051458_rois_cc400_metrics.csv', 'Caltech_0051459_rois_cc400_metrics.csv', 'Caltech_0051460_rois_cc400_metrics.csv', 'Caltech_0051461_rois_cc400_metrics.csv', 'Caltech_0051462_rois_cc400_metrics.csv', 'Caltech_0051463_rois_cc400_metrics.csv', 'Caltech_0051464_rois_cc400_metrics.csv', 'Caltech_0051465_rois_cc400_metrics.csv', 'Caltech_0051466_rois_cc400_metrics.csv', 'Caltech_0051467_rois_cc400_metrics.csv', 'Caltech_0051468_rois_cc400_metrics.csv', 'Caltech_0051469_rois_cc400_metrics.csv', 'Caltech_0051470_rois_cc400_metrics.csv', 'Caltech_0051471_rois_cc400_metrics.csv', 'Caltech_0051472_rois_cc400_metrics.csv', 'Caltech_0051473_rois_cc400_metrics.csv', 'Caltech_0051474_rois_cc400_metrics.csv', 'Caltech_0051476_rois_cc400_metrics.csv', 'Caltech_0051477_rois_cc400_metrics.csv', 'Caltech_0051478_rois_cc400_metrics.csv', 'Caltech_0051479_rois_cc400_metrics.csv', 'Caltech_0051480_rois_cc400_metri

In [51]:
import os
metricsDir =  R"C:\GIT\Connectomics\Research_Project\Binary_Notebooks\Binary_Output\Averaged"
pathToPheno = R"C:\GIT\Connectomics\Research_Project\Phenotypic_V1_0b_preprocessed1.csv"

# strippedFileName = "Leuven_2_0050722"

fileNames = os.listdir(metricsDir)

columns = [
    "Node", "Participant", "IQ", "Age", "Handedness", "Sex", 
    "Nodal EFficiency", "Global Efficiency", "Average Clustering", "Degree Centrality"
]




# node = 83

ba_nums = [6, 7, 9, 10, 18, 19, 21, 37, 39, 40, 45, 46, 47]
pathToMap = R"C:\GIT\Connectomics\Research_Project\labeled_regions_ba.csv"

brodmannMapping = FindNodesFromBrodmanns(ba_nums, pathToMap) # Provides a mapping between ba: nodes that overlap
print(brodmannMapping)
# print(brodmannMapping)
# THIS IS MESSED UP!


for ba, nodes in brodmannMapping.items():
    results_df = pd.DataFrame(columns=columns)
    dataframes = []
    # key is the ba, value is a list containing CC400 nodes mapping to said ba
    print(ba, nodes)
    
    # For each node
    # for filename in files
    for metricsFileName in fileNames:

        
        strippedFileName = metricsFileName.split('_rois')[0]
        metrics = ParseAverageMetrics(metricsFileName, metricsDir)
        # print(metrics['Nodal Efficiency'])
        phenotypic = ParsePhenotypicData(strippedFileName, pathToPheno)
        print(phenotypic)
        validParticipant = True
        
        for key, value in phenotypic.items():
            if value == -9999 or value == "Unknown" or metrics['Average Clustering'] == 0:
                validParticipant = False
        
        node_ids = metrics['Nodal Efficiency'].keys()
        # print(node_ids)
        
        for node_id in node_ids:
            temp_df = pd.DataFrame([{
            "Participant ID": strippedFileName,
            "IQ": phenotypic.get("FIQ", "N/A"),
            "Node ID": node_id,
            "BA" : ba,
            "Age": phenotypic.get("AGE_AT_SCAN", "N/A"),
            "Handedness": phenotypic.get("HANDEDNESS_CATEGORY", "N/A"),
            "Sex": phenotypic.get("SEX", "N/A"),
            "DX_Group": phenotypic.get("DX_GROUP", "N/A"),
            "Nodal Efficiency": metrics["Nodal Efficiency"][node_id],
            "Degree Centrality": metrics["Degree Centrality"][node_id],
            "Average Clustering": metrics["Average Clustering"],
            "Global Efficiency": metrics["Global Efficiency"],
            
        }])
        dataframes.append(temp_df)
    final_df = pd.concat(dataframes, ignore_index=True)
    # print(final_df)
    final_df.to_csv("test.csv", index=False)
        # if validParticipant:
        #     temp_df = pd.DataFrame([{
        #         "Participant" : strippedFileName,
        #         "Brodmann's Area" : ba,
        #         "DX_GROUP" : phenotypic.get("DX_GROUP", "N/A"),
        #         "IQ" : phenotypic.get("FIQ", "N/A"),
        #         "Age": phenotypic.get("AGE_AT_SCAN", "N/A"),
        #         "Handedness": phenotypic.get("HANDEDNESS_CATEGORY", "N/A"),
        #         "Sex": phenotypic.get("SEX", "N/A"),
        #         "Nodal Efficiency" : averagedMetrics.get("Nodal Efficiency"),
        #         "Global Eff": averagedMetrics.get("Global Efficiency", "N/A"),
        #         "Clustering": averagedMetrics.get("Average Clustering", "N/A"),
        #         "Degree Centrality": averagedMetrics.get("Nodal Efficiency")
        #     }])
        #     dataframes.append(temp_df)
    # results_df = pd.concat(dataframes, ignore_index=True)
    # outputString = "BA/25%/output_ba_" + str(ba) + ".csv"
    # results_df.to_csv(outputString, index=False)
    



{6: [7, 16, 28, 56, 72, 81, 113, 129, 145, 147, 150, 159, 165, 167, 174, 187, 210, 215, 241, 281, 291, 293, 304, 316, 334, 345, 352, 393], 7: [51, 61, 104, 118, 181, 211, 240, 263, 275, 287, 323, 327, 355, 360, 376, 381], 9: [53, 88, 96, 116, 178, 212, 249, 265, 279, 289, 386, 400], 10: [2, 60, 80, 84, 106, 142, 160, 172, 190, 202, 232, 235, 254, 264, 297, 379, 387, 392, 397], 18: [24, 43, 58, 62, 63, 76, 78, 92, 114, 136, 144, 151, 176, 180, 204, 225, 253, 269, 286, 318, 321, 332, 340, 351, 353, 362, 367, 378], 19: [1, 6, 13, 18, 26, 27, 37, 70, 105, 108, 146, 169, 173, 183, 219, 229, 231, 234, 282, 294, 295, 301, 326, 335, 357, 372, 377], 21: [36, 52, 57, 69, 155, 197, 209, 228, 280, 364, 374], 37: [17, 41, 55, 83, 94, 97, 110, 119, 127, 134, 175, 179, 188, 195, 218, 227, 230, 247, 251, 258, 259, 276, 278, 290, 296, 310, 315, 322, 346, 349, 361], 39: [8, 30, 73, 120, 135, 138, 141, 164, 189, 207, 217, 252, 308, 314, 319, 341, 356, 395], 40: [49, 65, 68, 102, 115, 199, 206, 237, 250, 

KeyboardInterrupt: 