__Imports__

In [1]:
import numpy as np
import networkx as nx
import os
from nilearn.image import load_img
from nilearn.plotting import find_parcellation_cut_coords


In [41]:

def FileToMatrix(fileName, dir):
    '''
    Function to load a particular time series, and generate a matrix for it!
    '''
    fullPath = f"{dir}\{fileName}"
    data = np.loadtxt(fullPath)
    correlationMatrix = np.corrcoef(data.T) # need to transpose (I think for all?)
    np.fill_diagonal(correlationMatrix, 0) # remove leading zeros
    
    return correlationMatrix
    

  fullPath = f"{dir}\{fileName}"


In [42]:
def LoadAtlas(fileName):
    '''
    Simple function to load an atlas, and return coords.
    '''
    
    atlasImg = load_img(fileName)
    coords = find_parcellation_cut_coords(atlasImg)
    return coords

In [43]:
def PercentageThresholding(correlationMatrix, percentageToKeep):
    '''
    Function to apply a percentage based thresholding, returns new adjacency matrix
    '''
    
    # Apply Fisher z-transformation to the correlation matrix
    transformedMatrix = np.arctanh(correlationMatrix)
    
    # Flatten the matrix, sort the values in descending order
    sortedValues = np.sort(transformedMatrix.flatten())[::-1]
    
    # Determine the thresholded value based on percentage (how many elements to keep)
    numElementsToKeep = int(len(sortedValues) * percentageToKeep)
    thresholdValue = sortedValues[numElementsToKeep - 1]
    
    # Create a new adjacency matrix based on our thresholding
    adjMatrix = np.zeros(transformedMatrix.shape, dtype=int) # int for binary
    adjMatrix[transformedMatrix >= thresholdValue] = 1 # 1 if keeping, 0 if thresholded
    

    
    return adjMatrix
    
    

In [44]:
def OutputBrainNetViewer(nodeData, G, coords, outputNodeName, outputEdgeName, weighted=False):
    '''
    Formats data for Brain Net Viewer.
    '''
    # Add coordinates from atlas
    
    if weighted:
        weightedDegrees = dict(G.degree(weight='weight'))
    else:
        weightedDegrees = dict(G.degree())
    

    nodeData['roi'] = nodeData.index
    
    nodeData['x'] = [coord[0] for coord in coords]
    nodeData['y'] = [coord[1] for coord in coords]
    nodeData['z'] = [coord[2] for coord in coords]
    
    nodeData['weightedDegree'] = nodeData.index.map(weightedDegrees)

    # Save node file for Brain Net Viewer
    nodeData[['x', 'y', 'z', 'roi', 'weightedDegree']].to_csv(outputNodeName, sep='\t', index=False, header=False)

    # Save adjacency matrix as .edge file
    edge_matrix = nx.to_numpy_array(G)
    np.savetxt(outputEdgeName, edge_matrix)

In [45]:
def OutputMatrix(matrix, dir, fileName):
    # Caltech_0051456_rois_cc400.1D
    
    # Ensure the directory exists
    os.makedirs(dir, exist_ok=True)
    
    # Modify the filename to end with "_mat.txt"
    baseName = fileName.split('.')[0]
    outputFileName = f"{baseName}_mat.txt"
    
    # Construct the full path for the output file
    outputPath = os.path.join(dir, outputFileName)
    
    # Save the matrix as a text file
    np.savetxt(outputPath, matrix)

__Data Acquisition__

In [None]:

dataDirectory = R"C:\GIT\Connectomics\Research_Project\Data\ALL\Outputs\ccs\nofilt_noglobal\rois_cc400"
fileNames = os.listdir(dataDirectory)
# print(fileNames)


# For each data point
counter = 0
for fileName in fileNames:
    
    if counter % 10 == 0:
        print(f"Counter: {counter}, Last File Parsed: {fileName}")
    
    correlationMat = FileToMatrix(fileName, dataDirectory)
    adjacencyMat = PercentageThresholding(correlationMat, 0.30)
    # OutputMatrix(adjacencyMat, R"Binary_Output/30%/Matrices", fileName) <-- Commented to ensure our data is not overwritten
    
    counter+=1
    



Counter: 0, Last File Parsed: Caltech_0051456_rois_cc400.1D
Counter: 10, Last File Parsed: Caltech_0051466_rois_cc400.1D
Counter: 20, Last File Parsed: Caltech_0051477_rois_cc400.1D
Counter: 30, Last File Parsed: Caltech_0051487_rois_cc400.1D
Counter: 40, Last File Parsed: CMU_b_0050657_rois_cc400.1D
Counter: 50, Last File Parsed: KKI_0050780_rois_cc400.1D
Counter: 60, Last File Parsed: KKI_0050791_rois_cc400.1D
Counter: 70, Last File Parsed: KKI_0050814_rois_cc400.1D
Counter: 80, Last File Parsed: KKI_0050825_rois_cc400.1D
Counter: 90, Last File Parsed: Leuven_1_0050692_rois_cc400.1D
Counter: 100, Last File Parsed: Leuven_1_0050702_rois_cc400.1D
Counter: 110, Last File Parsed: Leuven_2_0050722_rois_cc400.1D
Counter: 120, Last File Parsed: Leuven_2_0050733_rois_cc400.1D
Counter: 130, Last File Parsed: Leuven_2_0050745_rois_cc400.1D
Counter: 140, Last File Parsed: Leuven_2_0050756_rois_cc400.1D
Counter: 150, Last File Parsed: MaxMun_a_0051370_rois_cc400.1D
Counter: 160, Last File Parsed

# Mapping Nodes to Brain #

In [None]:
# For each of our correlation matrices: 

# 1. Create a ".node / .edge" file, mapped to the Craddock CC400 atlas

# 1.1 This will give us a mapping to which nodes matter for intelligence, as we have various regions. This also gives us a map for determining the significance of our graph analysis

# Graph Analysis #

In [19]:
import csv
import os
def OutputGraphMetrics(data, dir, fileName):
    # Ensure the directory exists
    os.makedirs(dir, exist_ok=True)
    
    # Remove _mat.txt
    fileName = fileName[:-8]
    # Modify the filename to end with a particular extension
    fileName = f"{fileName}_metrics.csv"
    # Construct the full path for the output file
    outputPath = os.path.join(dir, fileName)
    
    # Write data to a CSV file
    with open(outputPath, mode="w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(["Metric", "Value"])  # Write header
        
        for key, value in data.items():
            if isinstance(value, dict):
                # Convert dictionary to string for storage
                value_str = "; ".join([f"{k}: {v}" for k, v in value.items()])
                writer.writerow([key, value_str])
            else:
                writer.writerow([key, value])


In [20]:
# As described in 2.5.3.1 in the Hilger Paper
def CalculateNodalEfficiency(G):
    '''
    Calculates Nodal Efficiency for each node in a graph:
        - Where, Nodal Efficiency is defined as inversely proportional to the average shortest distance between node i and all other nodes j of the graph.
    '''
    nodalEfficiency = {}
    N = G.number_of_nodes()
    for i in G.nodes:
        # Calculate the shortest path lengths from node i to all other nodes
        shortestPaths = nx.single_source_shortest_path_length(G, i)
        
        # Calculate nodal efficiency for node i
        efficiencySum = sum(1 / shortestPaths[j] for j in shortestPaths if j != i and shortestPaths[j] > 0)
        nodalEfficiency[i] = efficiencySum / (N - 1)
        
    return nodalEfficiency

__Note on Multiple Thresholding__

This approach follows the Hilger paper. In essence, we will calculate network metrics for each of the 5 thresholding levels, and then average these network metrics together to be used in analysis. An alternative approach might be to average the correlation matrices, but the former seems more reasonable at the time of writing.

In [22]:
# For each of our correlation matrices:
# 1. Create a Graph
matrix = np.loadtxt("Binary_Output/10%/Matrices/Caltech_0051456_rois_cc400_mat.txt")
G = nx.from_numpy_array(matrix)

# ? Average ?
# 2. Calculate Graph Metrics:

# 2.1 Nodal Efficiency (as described by hilger paper)
nodalEfficiency = CalculateNodalEfficiency(G)
# 2.2 Global Efficiency 
globalEfficiency = nx.global_efficiency(G)
# 2.3 Degree Centrality <-- For Each Node
degreeCentrality = nx.degree_centrality(G)
# 2.4 Average Shortest Path
averageShortestPathLength = 0
if nx.is_connected(G):
    averageShortestPathLength = nx.average_shortest_path_length(G)
# 2.5 Average Degree
averageDegree = np.mean([G.degree(node) for node in G])
# 2.6 Average Clustering Coefficient
averageClustering = nx.average_clustering(G)
# 2.6 Number of Nodes and Edges
numNodes = G.number_of_nodes()
numEdges = G.number_of_edges()

data = {
    "Nodal Efficiency": nodalEfficiency,  # Dictionary format
    "Global Efficiency": globalEfficiency,
    "Degree Centrality": degreeCentrality,  # Dictionary format
    "Average Shortest Path Length": averageShortestPathLength,
    "Average Degree": averageDegree,
    "Average Clustering Coefficient": averageClustering,
    "Number of Nodes": numNodes,
    "Number of Edges": numEdges
}

dataDirectory = R"C:\GIT\Connectomics\Research_Project\Binary_Notebooks\Binary_Output\10%\NetworkMetrics"
fileName = "Caltech_0051456_rois_cc400_mat.txt"

OutputGraphMetrics(data, dataDirectory, fileName)