# Ground Truth Data Import Notebook

The point of this notebook is to give the tools to import 4 different ground truth datasets:

1. Mishchenko: small but highly annotated 
2. Kashturi: largest annotated volume
2. Janelia: super high isotropic resolution  
3. Eyewire: low resolution dataset

We show here how to handle the import script for each of these datasets.  
First connect to database. If the database does not exist at the given path, it is automatically created an initialized.

In [1]:
import sys; sys.path.insert(0, "../")
from database.models import DatabaseSession

database_path = "/Users/Jeremy/Documents/Neuro/mishchenko1.db"
ds = DatabaseSession(database_path)

## Datasets

### Mishchenko

Required parameters:

In [2]:
import scipy.io
import numpy as np

dataset_name = "Mishchenko2"  
source_url = '''https://www.dropbox.com/sh/feyvo74v4fikyph/AAB8av97edabKP_UJngXrRvIa?dl=0'''  
reference_url = '''http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0008853'''  
comments = "A reconstruction of a part of the rat hippocampus." + \
"Very small volume but well annotated: synapses (PSD), cell types and cell regions."  
data_source_path = "/Users/Jeremy/Documents/Neuro/Mishchenko/PLOS2010raw.mat"

(x, y, z, vx, vy, vz) = (542, 813, 93, 8, 8, 50)
dim = (x, y, z)
vdim = (vx, vy, vz)

true_map = {}
mat = scipy.io.loadmat(data_source_path)
typeMap = mat['typesmap'][0]
synpaseidsMap = [id[0] for id in mat['synapses_ids']]
synpasePartnersMap = mat['synpartners_ids']
for i in xrange(0, len(synpaseidsMap)):
    true_map[synpaseidsMap[i]] = synpasePartnersMap[i]
true_map[0] = (0, 0)

Required methods:

In [3]:
def get_cell_type(cell_id, type_map):
    type_map_database = {4:"glia", 2:"neuron", 3:"neuron",\
     1:"unknown", 11:"unknown", 8:"unknown"}
    return type_map_database[type_map[cell_id]]

def get_region_type(cell_id, region_id, is_synapse, type_map):
    type_map_database = {4:"unknown", 2:"dendrite", 3:"axon",\
     1:"unknown", 11:"unknown", 8:"unknown"}
    if is_synapse: 
        return "synapse"
    else:
        return type_map_database[type_map[cell_id]]
    
def get_slice(k):
    real_slice = np.array(mat['segmentation'][0][k], np.uint32)
    synapse_slice = np.array(mat['synapses'][0][k], np.uint16)
    mito_slice = np.zeros((x, y), np.uint16)
    for i in range(x):
        for j in range(y):
            if synapse_slice[i,j] != 0: 
                real_slice[i,j] = true_map[synapse_slice[i,j]][0]
    return (np.transpose(real_slice), np.transpose(synapse_slice), mito_slice)

### Janelia

Required parameters:

In [None]:
import os

dataset_name = "Janelia"
source_url = "http://emdata.janelia.org/"
reference_url = "https://www.janelia.org/project-teams/fly-em/research/progress/progress-reconstruction/seven-column-connectome-fib-sem"
comments = "FIB-SEM, one long medulla column. No paper published with the data. Contains neurons and unidentified cells, no region annotation"

#Source data path on local machine
data_source_path = '/Users/Jeremy/Documents/Neuro/Janelia/images/'

try: 
    os.remove(data_source_path+".DS_Store")
except OSError:
    pass

files = os.listdir(data_source_path)
#Dataset info
(x, y, z, vx, vy, vz) = (2000, 2000, 6239, 8, 8, 8)
dim = (x, y, z)
vdim = (vx, vy, vz)
typeMap = {}

Required methods:

In [None]:
import cv2
import numpy as np

def get_cell_type(cell_id, type_map):
    return "neuron"

def get_region_type(cell_id, region_id, is_synapse, type_map):
    return "unknown"
    
def get_slice(k):
    real_slice = (cv2.imread(data_source_path + files[k],0)).astype(np.uint32)
    synapse_slice = np.zeros((x, y), np.uint16)
    mito_slice = np.zeros((x, y), np.uint16)
    return (np.transpose(real_slice), np.transpose(synapse_slice), np.transpose(mito_slice))

### Kasthuri

Required parameters:

In [None]:
import os

dataset_name = "Kasthuri"
source_url = "http://w.ocp.me/datum:kasthuri15"
reference_url = "http://www.cell.com/abstract/S0092-8674(15)00824-7"
comments = "Volume of mouse neocortex, neuron segments, mitochondrias and synapses annotated but no other regions."
source_path = '/Users/Jeremy/Documents/Neuro/Kasthuri/'
typeMap = {}

try: 
    os.remove(source_path+".DS_Store")
except OSError:
    pass

(mitos_path, synapses_path, segments_path)  = (source_path + 'kat11mito/',\
                                               source_path + 'kat11synapses/',\
                                               source_path + 'kat11segments/')
(files_synapses, files_segments, files_mitos) = (os.listdir(synapses_path),\
                                                 os.listdir(segments_path),\
                                                 os.listdir(mitos_path))

(x, y, z, vx, vy, vz) = (10752, 13312, 1849, 6, 6, 30)
dim = (x, y, z)
vdim = (vx, vy, vz)

Required methods:

In [None]:
import cv2
import numpy as np

def get_cell_type(cell_id, type_map):
    return "neuron"

def get_region_type(cell_id, region_id, is_synapse, type_map):
    region_type = "unknown"
    if region_id != 0: region_type = "mitochondria"
    if is_synapse: region_type = "synapse"
    return region_type

def get_slice(k):
    real_slice = np.rot90(cv2.imread(segments_path + files_segments[k],-1).astype(np.uint32))
    mito_slice = np.rot90(cv2.imread(mitos_path + files_mitos[k],-1).astype(np.uint16))
    synapse_slice = np.rot90(cv2.imread(synapses_path + files_synapses[k],-1).astype(np.uint16))
    return (real_slice, synapse_slice, mito_slice)

### Eyewire

Required parameters:

In [None]:
tar_source_path = '/Users/Jeremy/Documents/Neuro/Eyewire/nature2014.tar.gz'
file_name = "volume.uint32_t.raw"
source_url = '''http://seunglab.org/data/'''
reference_url = '''http://www.nature.com/nature/journal/v509/n7500/abs/nature13240.html'''
name = "Eyewire"
comments = "A reconstruction of a part of the mouse's retina. The data is very sparse and the resolution quite low. No region annotation."

dim = (2432, 10496, 6528)
vdim = (33, 33, 46)

## Run Import

Run the import by running the shell below, starting with creating a new ground truth dataset:

In [4]:
import database.methods.datasets as d

ds.begin()
dataset_id = d.create_ground_truth_dataset(ds, dataset_name, dim, vdim, source_url, reference_url, comments)
ds.commit()

In [None]:
import database.methods.connectomics as c
import database.methods.conversion as cv
import ipywidgets as widgets
from IPython.display import display

start_slice = 0
end_slice = 93

progressbar = widgets.FloatProgress(value=start_slice,min=max(0, start_slice)\
                                    , max=min(end_slice, z),step=1, description='Loading:',)
display(progressbar)
ds.begin()
for k in range(start_slice, end_slice):
    progressbar.value = k + 1
    (real_slice, synapse_slice, mito_slice) = get_slice(k)
    (data_points, cell_points) = cv.ingest_slice(ds, dataset_id, k,\
        real_slice, synapse_slice, mito_slice, get_cell_type, get_region_type, typeMap)
    c.add_data_points(ds, data_points)
    c.add_cells(ds, cell_points)
ds.commit()