In [1]:
import os
import glob
import random
import numpy as np
import scipy.sparse as sp
import torch

In [2]:
def read_vtk(filename):
    fid = open(filename, 'r')
    lines = fid.readlines()
    fid.close()

    v = []
    f = []
    b = ([lines.index(i) for i in lines if i.startswith("POINTS")])[0] + 1
    nVert = int(lines[b - 1].split()[1])
    for i in range(b, b + nVert):
        line = lines[i]
        row = [float(n) for n in line.split()] 
        v.append(row)

    b = ([lines.index(i) for i in lines if i.startswith("POLYGONS")])[0] + 1 
    nFaces = int(lines[b - 1].split()[1])
    for i in range(b, b + nFaces):
        line = lines[i]
        row = [int(n) for n in line.split()]
        row = row[1:]
        f.append(row)

    v = np.array(v)
    f = np.array(f)
    return v, f

### Make an edge list

In [3]:
def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot

## Mesh

In [4]:
path = '../data/MMRR-21-20/'
features = []
for file in glob.glob(os.path.join(path, "lh.*.txt")):
    feat = np.genfromtxt(file, dtype=np.float32)
    features.append(feat)
features = np.array(features)
features = np.transpose(features)
#features = sp.csr_matrix(features)

labels_path = os.path.join(path, "label")
labels_data = np.genfromtxt(os.path.join(labels_path, "lh.label.txt"))
labels = encode_onehot(labels_data)

In [5]:
[v, f] = read_vtk(os.path.join(path, 'lh.white.vtk'))

In [6]:
unordered_edges = np.concatenate(
    [np.stack((f[:, 0], f[:, 1]), axis=1),
        np.stack((f[:, 1], f[:, 2]), axis=1),
        np.stack((f[:, 2], f[:, 0]), axis=1)],
    axis=0
    )

In [7]:
adj = sp.coo_matrix(
    (np.ones(unordered_edges.shape[0]), (unordered_edges[:, 0], unordered_edges[:, 1])),    # row, col
    shape=(labels.shape[0], labels.shape[0]),
    dtype=np.float32
            )

In [17]:
colsum = np.array(features.sum(0))

In [18]:
c_inv = np.power(colsum, -1).flatten()

In [19]:
c_inv[np.isinf(c_inv)] = 0.

In [20]:
c_mat_inv = sp.diags(c_inv)

In [22]:
c_mat_inv.shape

(3, 3)

In [82]:
def normalize(mx):
    """
    Col-normalize sparse matrix
    Variance of sparse matrix a
    var = mean(a**2) - mean(a)**2
    """
    mx_squared = mx.copy()
    mx_squared.data **= 2
    var = mx_squared.mean(axis=0) - np.square(mx.mean(axis=0))
    std = np.sqrt(var)
    mean = np.mean(mx, axis=0)
    return (mx-mean)/std

In [24]:
features.shape[1]

3

In [78]:
(adj_squared.mean(axis=0) - np.square(adj.mean(axis=0))).shape

(1, 149927)

In [None]:
faces = f
unordered_edges = np.concatenate(
    [np.stack((faces[:, 0], faces[:, 1]), axis=1), 
     np.stack((faces[:, 1], faces[:, 2]), axis=1),
     np.stack((faces[:, 2], faces[:, 0]), axis=1)],
    axis=0)


In [None]:
unordered_edges[:, 1].shape

In [None]:
adj = sp.coo_matrix((np.ones(unordered_edges.shape[0]), 
                     (unordered_edges[:, 0], unordered_edges[:, 1])),    # row, col
                    shape=(labels.shape[0], labels.shape[0]),
                    dtype=np.float32)

In [None]:
n_nodes = labels.shape[0]
all_indices = np.arange(n_nodes)
idx_test = random.sample(set(all_indices), int(n_nodes*0.2))
idx_train_val = list(set(all_indices)-set(idx_test))
idx_val = random.sample(set(idx_train_val), int(n_nodes*0.1))
idx_train = list(set(idx_train_val) - set(idx_val))

In [None]:
print(type(idx_train), len(idx_train))
print(type(idx_test), len(idx_test))
print(type(idx_val), len(idx_val))

In [None]:
len(idx_train) + len(idx_test) + len(idx_val) == n_nodes

## Now, let's read meshes of all subjects

In [36]:
data_path = "/data/human/Mindboggle/DL/mesh/"
all_subjects = [subject for subject in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, subject))]
n_subjects = len(all_subjects)
print(n_subjects)

101


In [7]:
subj_test = random.sample(all_subjects, int(n_subjects*0.2))
subj_tv = list(set(all_subjects) - set(subj_test))
subj_train = random.sample(subj_tv, int(n_subjects*0.7))
sub_val = list(set(subj_tv) - set(subj_train))

In [8]:
print(len(subj_test), len(subj_tv), len(sub_val), len(subj_train))

20 81 11 70


In [None]:
for epoch in range(20):
    subj_train = random.sample(subj_tv, int(n_subjects*0.7))
    sub_val = list(set(subj_tv) - set(subj_train))
    
    
    # load data for each subject
        # run model


In [17]:
import time
from tqdm import tqdm

In [18]:
label_path = "/data/human/Mindboggle/DL/label/"

In [34]:
os.listdir('/data/human/Mindboggle/DL/mesh/Afterthought-1/')

['lh.white.vtk',
 'lh.sphere.vtk',
 'rh.white.vtk',
 'rh.sphere.vtk',
 'lh.curv.txt',
 'lh.sulc.txt',
 'lh.iH.txt',
 'rh.curv.txt',
 'rh.sulc.txt',
 'rh.iH.txt',
 'lh.thickness.txt',
 'rh.thickness.txt']

In [42]:
feat_name = ['curv', 'iH', 'sulc']

In [51]:
data_path = '/data/human/Mindboggle/DL/'
mesh_path = os.path.join(data_path, "mesh")
subject = 'Afterthought-1'
subj_path = os.path.join(mesh_path, subject)
[v, f] = read_vtk(os.path.join(subj_path, 'lh.white.vtk'))    # Index for vertices starts from 0

features = []
for name in feat_name:
    print(name)
    for file in glob.glob(os.path.join(subj_path, "lh.{}.txt".format(name))):
        feat = np.genfromtxt(file, dtype=np.float32)
        print(type(feat))
        features.append(feat)
features = np.array(features)
features = np.transpose(features)
features = sp.csr_matrix(features)

curv
<class 'numpy.ndarray'>
iH
<class 'numpy.ndarray'>
sulc
<class 'numpy.ndarray'>


In [52]:
features.shape

(130921, 3)