# Download benchmarking data from S3 with Neuroglancer


#### This notebook explains how to:
(1) Read benchmarking data from S3 via Neuroglancer 

(2) download raw benchmarking data to your local computer 

#### Quick notes on the benchmarking data:

In octree format, data is labled in folders, labeled test_1 through test_25 and validation_1 through validation_25. 

If when downloading, you get a reshape error, try first uploading segments and then re-uploading the volumes. 

Known issues with a few of the files: 

- test_9,test_10 - didnt seem to have good swc alignment

- test_24 - issues with the image

- validation_11 - seems to be a shift between swcs and the image


##### Define locations

In [1]:
from brainlit.utils import session
from brainlit.utils.Neuron_trace import NeuronTrace

#Can change to test_"1-25", validation_"1-25"
dest = "s3://open-neurodata/brainlit/benchmarking_data/validation_21" 
dest_segments = "s3://open-neurodata/brainlit/benchmarking_data/validation_21"

  self.schema["$schema"]


##### Create Neuroglancer session & download benchmarking volume

In [2]:
%%capture
sess = session.NeuroglancerSession(url=dest, url_segments=dest_segments, mip=0)  # create session object 
img, bounds, vertices = sess.pull_vertex_list(1, [1], 0, expand=True)  # get full benchmarking image

##### Download a specific .swc

In [3]:
seg_id = 1 # Can change

G_paths = sess.get_segments(seg_id, bounds, rounding = False)
G = G_paths[0]
paths = G_paths[1]

Downloading: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 23.87it/s]
Downloading: 100%|████████████████████████████████████| 1/1 [00:00<00:00, 30.58it/s]


# ORIGINAL_SOURCE CloudVolume 0.65.5
# CREATURE 
# REGION
# FIELD/LAYER
# TYPE
# CONTRIBUTOR 
# REFERENCE
# RAW 
# EXTRAS 
# SOMA_AREA
# SHINKAGE_CORRECTION 
# VERSION_NUMBER 
# VERSION_DATE 2021-04-29T06:48:31.447474
# SCALE 1.0 1.0 1.0


1 0 104.875275 219.958252 -12.159082 1.000000 -1
32 0 142.883347 164.959061 69.841988 1.000000 1
35 0 142.883347 165.957108 69.841988 1.000000 32
37 0 143.877365 165.957108 69.841988 1.000000 35
38 0 143.877365 167.956421 69.841988 1.000000 37
48 0 145.865402 169.958984 67.841782 1.000000 38
56 0 146.859421 169.958984 66.841675 1.000000 48
60 0 147.879593 170.957031 65.841576 1.000000 56
59 0 147.879593 169.958984 64.841469 1.000000 60
65 0 148.873611 168.957703 63.841366 1.000000 59
75 0 150.861649 168.957703 61.841164 1.000000 65
84 0 151.881821 168.957703 61.841164 1.000000 75
71 0 149.867630 168.957703 61.841164 1.000000 84
58 0 147.879593 166.958389 59.840954 1.000000 71
54 0 146.859421 165.957108 59.840954 1.000000 58
55 0 146.859421 166.958389


[array([[104, 219, -12],
       [180, 109, 113],
       [179, 110, 113],
       [178, 111, 112],
       [177, 112, 112],
       [176, 112, 111],
       [176, 113, 110],
       [176, 113, 109],
       [177, 114, 108],
       [177, 115, 107],
       [177, 115, 104],
       [177, 116, 104],
       [177, 115, 105],
       [177, 115, 106],
       [174, 115, 106],
       [173, 116, 106],
       [171, 116, 106],
       [171, 117, 106],
       [170, 118, 106],
       [169, 119, 105],
       [167, 121, 105],
       [166, 121, 105],
       [164, 123, 103],
       [164, 123, 102],
       [165, 123, 102],
       [166, 122, 102],
       [165, 123, 103],
       [162, 123, 103],
       [161, 123, 102],
       [159, 125, 100],
       [159, 126,  99],
       [159, 125,  98],
       [159, 126,  98],
       [160, 126,  98],
       [160, 131,  98],
       [162, 133,  98],
       [164, 135,  96],
       [164, 135,  95],
       [165, 135,  95],
       [165, 139,  95],
       [167, 141,  95],
       [167, 1

##### Visualize with napari

In [6]:
import napari
with napari.gui_qt():
    viewer = napari.Viewer(ndisplay=3)
    viewer.add_image(img)
    viewer.add_shapes(data=paths, shape_type='path', edge_width=1.0, edge_color='blue', opacity=0.8)

# Download RAW benchmarking data

###### This will download the benchmarking data in .tif and .swc format to a local destination

In [7]:
import boto3
from botocore import UNSIGNED
from botocore.client import Config
import os
from pathlib import Path
import numpy as np
from skimage import io
from tqdm import tqdm

## Create directories

In [8]:
cwd = Path(os.path.abspath(''))
data_dir = os.path.join(cwd, "data")
print(f"Downloading segments to {data_dir}")
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

im_dir = os.path.join(data_dir, "sample-tif-location")
if not os.path.exists(im_dir):
    os.makedirs(im_dir)

swc_dir = os.path.join(data_dir, "sample-swc-location")
if not os.path.exists(swc_dir):
    os.makedirs(swc_dir)

Downloading segments to C:\Users\shrey\Documents\NDD\brainlit\docs\notebooks\utils\data


### Accessing .tif files

On mac/linux, we use os.path.join to construct the s3 path. However on windows you should set prefix to "brainlit/benchmarking_data/tif-files"

In [13]:
s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
bucket = s3.Bucket("open-neurodata")
prefix = os.path.join("brainlit", "benchmarking_data", "tif-files")
#use this for windows
#prefix = "brainlit/benchmarking_data/tif-files" 
im_count = 0
for _ in bucket.objects.filter(Prefix=prefix):
    im_count += 1
for i, im_obj in enumerate(tqdm(bucket.objects.filter(Prefix=prefix))):
    if im_obj.key[-4:] == '.tif':
        im_name = os.path.basename(im_obj.key)
        im_path = os.path.join(im_dir, im_name)
        bucket.download_file(im_obj.key, im_path)

0it [00:00, ?it/s]


The below code can visualize a specified .tif file.

In [14]:
import napari

file_name = "validation_21-gfp.tif" # Can change to any image (test 1-25, validation 1-25)

im_file = Path(im_dir) / file_name
im = io.imread(im_file, plugin="tifffile") 
    
with napari.gui_qt():
    viewer = napari.Viewer(ndisplay=3)
    viewer.add_image(im)

### Accessing .swc files

Again, on windows you need to make the variable called prefix a string

In [16]:
s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
bucket = s3.Bucket("open-neurodata")
prefix = "brainlit/benchmarking_data/Manual-GT" #use this for windows
#prefix = os.path.join("brainlit", "benchmarking_data", "Manual-GT")
swc_count = 0
for _ in bucket.objects.filter(Prefix=prefix):
    swc_count += 1
for i, swc_obj in enumerate(tqdm(bucket.objects.filter(Prefix=prefix))):
    if swc_obj.key[-4:] == '.swc':
        idx = swc_obj.key.find('Manual-GT')
        swc_name = swc_obj.key[idx:]
        swc_path = os.path.join(swc_dir, swc_name)
        dir = os.path.dirname(swc_path)
        if not os.path.exists(dir):
            os.makedirs(dir)
        bucket.download_file(swc_obj.key, swc_path)

601it [01:12,  8.26it/s]


### Aligning and visualizing images & swcs

In [17]:
from brainlit.utils.benchmarking_params import brain_offsets, vol_offsets, scales, type_to_date
from brainlit.utils.Neuron_trace import NeuronTrace
from pathlib import Path
import numpy as np
from skimage import io
import napari

In [18]:
im_dir = Path(im_dir)
swc_base_path = Path(swc_dir) / "Manual-GT"
gfp_files = list(im_dir.glob("**/*-gfp.tif"))

In [19]:
for im_num, im_path in enumerate(gfp_files):
    
    print(f"Image {im_num+1}/{len(gfp_files)}")
    print(im_path)
    
    f = im_path.parts[-1][:-8].split("_")
    image = f[0]
    date = type_to_date[image]
    num = int(f[1])

    scale = scales[date]
    brain_offset = brain_offsets[date]
    vol_offset = vol_offsets[date][num]
    im_offset = np.add(brain_offset, vol_offset)

    lower = int(np.floor((num - 1) / 5) * 5 + 1)
    upper = int(np.floor((num - 1) / 5) * 5 + 5)
    dir1 = date + "_" + image + "_" + str(lower) + "-" + str(upper)
    dir2 = date + "_" + image + "_" + str(num)
    swc_path = swc_base_path / dir1 / dir2
    swc_files = list(swc_path.glob("**/*.swc"))
    im = io.imread(im_path, plugin="tifffile")
    print(f"Image shape: {im.shape}")

    paths_total = []
    for swc_num, swc in enumerate(swc_files):
        if "0" in swc.parts[-1]:
            # skip the bounding box swc
            continue

        swc_trace = NeuronTrace(path=str(swc))
        paths = swc_trace.get_paths()
        swc_offset, _, _, _ = swc_trace.get_df_arguments()
        offset_diff = np.subtract(swc_offset, im_offset)
       
        for path_num, p in enumerate(paths):
            pvox = (p + offset_diff) / (scale) * 1000
            paths_total.append(pvox)
            
    with napari.gui_qt():
        viewer = napari.Viewer(ndisplay=3)
        viewer.add_image(np.swapaxes(im,0,2))
        viewer.add_shapes(data=paths_total, shape_type='path', edge_width=1.0, edge_color='blue', opacity=0.8)
        

Image 1/50
C:\Users\shrey\Documents\NDD\brainlit\docs\notebooks\utils\data\sample-tif-location\test_1-gfp.tif
Image shape: (100, 330, 330)
