In [1]:
import os, shlex
from subprocess import check_output
repo_rootdir = check_output(shlex.split("git rev-parse --show-toplevel")).strip().decode('ascii')
os.chdir(repo_rootdir)

from random import shuffle
import numpy as np
import ipywidgets as widgets
import yaml
from copy import deepcopy
import json
import re
import itertools
import numpy as np
import scipy

from scipy.spatial import procrustes as procrustes

import glob, os, json
import pandas as pd

from VTK.VTKMesh import VTKObject as Mesh
from cardiac_mesh import CardiacMesh
from sklearn.decomposition import PCA, IncrementalPCA as IPCA
import pickle
from config_parser import read_default_config

## Definitions of paths and constants

In [3]:
config = read_default_config()
config['nTraining'] = 3200
cardiac_data = pickle.load(open(config["preprocessed_data"], "rb"))

output_dir = "output/PCA__LV__%s_samples" % config['nTraining']

In [6]:
perf_dfs = []
n_comps = 16

# meshes_lst, ids_lst = flattened_partition(cardiac_data, config["nTraining"])
shuffled_ids = [x for x in range(len(cardiac_data.ids))]
shuffle(shuffled_ids)
    
flattened_meshes = cardiac_data.vertices[shuffled_ids,:,:].reshape(cardiac_data.N,-1)

for n in range(1, n_comps+1):
  
  print(n)
  
  #TODO: This doesn't have to be done again for each n, but I need to figure out how to do it.
  training_meshes = flattened_meshes[:config['nTraining'],:]
  pp = PCA(n).fit(training_meshes)    
    
  meshes_reduced = pp.transform(flattened_meshes)
  meshes_reconstructed = pp.inverse_transform(meshes_reduced)
  mse = ((flattened_meshes - meshes_reconstructed)**2).mean(axis=1)
  
  mse_shuffled = ((flattened_meshes - meshes_reconstructed[shuffled_ids,:])**2).mean(axis=1)
    
  perf_df = pd.DataFrame({
    "model_id": ["PCA__LV__{}_comps__{}_samples".format(str(n), str(config['nTraining']))] * cardiac_data.N,
    "subject_id": cardiac_data.ids,
    "subset": ["training"] * config['nTraining'] + ["test"] * (cardiac_data.N - config['nTraining']),
    "mse": mse,
    "mse_shuffled": mse_shuffled,
    "n": [n] * cardiac_data.N,      
  })
  
  perf_df.index = cardiac_data.ids
  perf_dfs.append(perf_df)
        

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16


In [15]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
pickle.dump(pp, open("%s/PCA_model.pkl" % output_dir, "w"))

latent_space_df = pd.DataFrame(meshes_reduced, columns=["z"+str(i) for i in range(meshes_reduced.shape[1])])
latent_space_df['subset'] = pd.Series(["training"] * config['nTraining'] + ["test"] * (cardiac_data.N - config['nTraining']))
latent_space_df.index = cardiac_data.ids

perf_df_all = pd.concat(perf_dfs)
perf_df_all

perf_df_all.to_csv("%s/performance.csv" % output_dir, index_label="ID")
latent_space_df.to_csv("%s/latent_space.csv" % output_dir, index_label="ID")

TypeError: write() argument must be str, not bytes

In [12]:
output_dir

'output/PCA__LV__3200_samples'