# Exports retina ply cells to h5 format

The retina cell ply files are stored `seungmount/research/nkem/share/e2198_meshes/` and the cell type classifications can be found in `/home/svenmd/Downloads/classification.csv`. The This jupyter notebook reads each cell and stores it as `h5` files.

Classifications are only available for <400 of the ~1000 cells. Cells with and without label are stored in separate folder (`.../*labeled*/`, `.../*unlabeled*/`). Labels are stored under `str_label` (actual label) and `int_label` (mapping to contrinuous integer space (0-max)).

### Preperations

In [47]:
import csv
import h5py
import glob
import numpy as np
import os
import plyfile
import pandas as pd
import re
import time

HOME = os.path.expanduser("~")

Defining paths:

In [10]:
ply_folder = HOME + "/seungmount/research/nkem/share/e2198_meshes/"

ply_paths = glob.glob(ply_folder + "/*.ply")
class_path = HOME + "/seungmount/research/nkem/share/e2198_meshes/classification.csv"

Reading the classifications:

In [19]:
clf_table = pd.read_csv(class_path, index_col=1, header=None)

In [27]:
reader = csv.reader(open(class_path, 'r'))
cell_type_dict = {}
for row in reader:
    cell_type, cell_id = row
    cell_type_dict[int(cell_id)] = cell_type

In [35]:
u_cell_types = np.unique(list(cell_type_dict.values()))
cell_type_mapping = dict(zip(u_cell_types, range(len(u_cell_types))))

### Read plys and write h5s
This takes some time (~10h)

In [38]:
def read_ply(path):
    m = plyfile.PlyData.read(path)

    vertices = m["vertex"].data.view(np.float32).reshape(-1, 3)
    faces = np.array(list(m["face"].data['vertex_indices']))
    
    return vertices, faces

In [None]:
time_start = time.time()
for i_cell_path, ply_path in enumerate(ply_paths):        
    
    if i_cell_path > 0:
        dt = time.time() - time_start
        eta = dt / i_cell_path * len(ply_paths) - dt
        print("%d / %d - dt = %.3fs - eta = %.3fs" % (i_cell_path, len(ply_paths), dt, eta), end='\r')

    vertices, faces = read_ply(ply_path)
    cell_id = int(re.findall("[\d]+", ply_path)[-1])
    
    if cell_id in cell_type_dict:
        str_label = cell_type_dict[cell_id]
        int_label = cell_type_mapping[str_label]
    
        with h5py.File("/usr/people/svenmd/seungmount/research/nick_and_sven/data/e2198_labeled_180619/%d_vertex_gt.h5" % cell_id, "w") as f:
            f.create_dataset("int_label", data=[int_label], compression="gzip")
            f.create_dataset("str_label", data=[str_label.encode('utf8')], compression="gzip")
            f.create_dataset("vertices", data=vertices, compression="gzip")
            f.create_dataset("faces", data=faces, compression="gzip")
            f.create_dataset("cell_id", data=[cell_id], compression="gzip")
    else:
        with h5py.File("/usr/people/svenmd/seungmount/research/nick_and_sven/data/e2198_unlabeled_180619/%d_vertex_gt.h5" % cell_id, "w") as f:
            f.create_dataset("vertices", data=vertices, compression="gzip")
            f.create_dataset("faces", data=faces, compression="gzip")
            f.create_dataset("cell_id", data=[cell_id], compression="gzip")

7 / 1041 - dt = 387.652s - eta = 57261.667s