# Soma Detection Analysis of Whole-Brain Light-Sheet Images

## 1. Before Using this notebook

### 1a. Install brainlit, and other packages that this notebook uses
### 1b. Write images to s3 using CloudReg
    - e.g. python -m cloudreg.scripts.create_precomputed_volumes --s3_input_paths /mnt/NAS/SmartSPIM_Data/2022_03_02/20220302_14_40_04_8529_destriped_DONE/Ex_561_Em_600_stitched --s3_output_paths  s3://smartspim-precomputed-volumes/2022_03_02/8529/Ch_561_v2  --voxel_size 1.83 1.83 2 --num_procs 24 --resample_iso False
### 1c. Make point annotations in neuroglancer to identify subvolumes for validation (and possible training)
    - instructions: https://neurodata.io/help/neuroglancer-pt-annotations/
    ,
    {
    "type":"pointAnnotation",
    "name": "soma_val",
    "points": []
    },
    {
    "type":"pointAnnotation",
    "name": "nonsoma_val",
    "points":[]
    }
### 1d. Update soma_data.py file

### \* Inputs \*

In [None]:
from brainlit.preprocessing import removeSmallCCs
from brainlit.BrainLine.data.soma_data import brain2paths, brain2centers
from brainlit.BrainLine.analyze_results import SomaDistribution
from brainlit.BrainLine.util import (
    json_to_points,
    find_atlas_level_label,
    fold,
    setup_atlas_graph,
    get_atlas_level_nodes,
    download_subvolumes,
)
from brainlit.BrainLine.apply_ilastik import ApplyIlastik, ApplyIlastik_LargeImage
from brainlit.BrainLine.parse_ara import *
import xml.etree.ElementTree as ET
from cloudreg.scripts.transform_points import NGLink
from brainlit.BrainLine.imports import *

%gui qt5

In [None]:
brainlit_path = Path(os.path.abspath(""))
brainlit_path = brainlit_path.parents[3]
print(f"Path to brainlit: {brainlit_path}")

for id in brain2paths.keys():
    if "base" in brain2paths[id].keys() and "val_info" in brain2paths[id].keys():
        base = brain2paths[id]["base"]
        if "http" in base:
            print(f"Sample {id}: http in basepath, which may cause write errors")

        try:
            url = brain2paths[id]["val_info"]["url"]
            layer = brain2paths[id]["val_info"]["somas_layer"]
            pts = json_to_points(url)[layer]
            layer = brain2paths[id]["val_info"]["nonsomas_layer"]
            pts = json_to_points(url)[layer]
        except:
            print(f"Sample {id}: Error finding validation annotations with val_info")

        if "train_info" in brain2paths[id].keys():
            try:
                url = brain2paths[id]["train_info"]["url"]
                layer = brain2paths[id]["train_info"]["somas_layer"]
                pts = json_to_points(url)[layer]
                layer = brain2paths[id]["train_info"]["nonsomas_layer"]
                pts = json_to_points(url)[layer]
            except:
                print(
                    f"Sample {id}: Error finding training annotations with train_info"
                )
    else:
        print(f"Sample {id}: Does not conform to desired format")

## 2. Download benchmark data

### \*Inputs\*

In [None]:
brain = "887"  # brain ID
soma_data_dir = "/Users/thomasathey/Documents/mimlab/mouselight/ailey/detection_soma/"  # path to directory where training/validation data should be stored
dataset_to_save = "val"  # train or val

antibody_layer = "Ch_647"
background_layer = "Ch_561"
endogenous_layer = "Ch_488"

### Setup paths

In [None]:
layer_names = [antibody_layer, background_layer, endogenous_layer]

if brain not in brain2paths.keys():
    raise ValueError(f"brain {brain} not an entry in brain2paths in axon_data.py file")

if f"{dataset_to_save}_info" not in brain2paths[
    brain
].keys() or dataset_to_save not in ["train", "val"]:
    raise ValueError(f"{dataset_to_save}_info not in brain2paths[{brain}].keys()")


for layer in [antibody_layer, background_layer, endogenous_layer]:
    try:
        CloudVolume(base + layer)
    except:
        print(f"Sample {id}: Layer {layer} not found in {base}")

### Download data

In [None]:
download_subvolumes(
    soma_data_dir,
    brain_id=brain,
    layer_names=layer_names,
    dataset_to_save=dataset_to_save,
    object_type="soma",
)

## 3. View downloaded data (optional)

### \*Inputs\*

In [None]:
fname = "/Users/thomasathey/Documents/mimlab/mouselight/ailey/detection_soma/brainr2/val/891_4202_1717_pos.h5"  # path to file for viewing
scale = [1.8, 1.8, 2]  # voxel size in microns

In [None]:
with h5py.File(fname, "r") as f:
    pred = f.get("image_3channel")
    image_fg = pred[0, :, :, :]
    image_bg = pred[1, :, :, :]
    image_endo = pred[2, :, :, :]

viewer = napari.Viewer(ndisplay=3)
viewer.add_image(image_fg, scale=scale)
viewer.add_image(image_bg, scale=scale)
viewer.add_image(image_endo, scale=scale)
viewer.scale_bar.visible = True
viewer.scale_bar.unit = "um"

## 4. Apply ilastik to validation data

You can do this programmatically (below), or you can use the ilastik GUI (which is sometimes faster)

### \* Inputs \*

In [None]:
model = "_rabies_pix_3ch"
project_path = f"/Users/thomasathey/Documents/mimlab/mouselight/ailey/detection_soma/matt_soma{model}.ilp"  # path to ilastik model to be used
ilastik_path = (
    "/Applications/ilastik-1.4.0b21-OSX.app/Contents/ilastik-release/run_ilastik.sh"
)
brains_path = "/Users/thomasathey/Documents/mimlab/mouselight/ailey/detection_soma/"
brains_path = "/Users/thomasathey/Documents/mimlab/mouselight/brainlit_parent/"
brains = ["test"]  # [brain]

In [None]:
applyilastik = ApplyIlastik(
    ilastk_path=ilastik_path,
    project_path=project_path,
    brains_path=brains_path,
    brains=brains,
)
applyilastik.process_somas()
applyilastik.move_results()

### \*Inputs (if relevant)\*
- identify files that have two somas in variable below. Since voxel coordinates are likely to be unique across samples, the file names below do not include sample IDs.

In [None]:
doubles = [
    "3972_1636_1575_pos_Probabilities.h5",
    "2867_4336_1296_pos_Probabilities.h5",
    "2607_1845_1309_pos_Probabilities.h5",
    "2101_3397_1747_pos_Probabilities.h5",
    "2011_3452_1911_pos_Probabilities.h5",
    "2113_3353_1727_pos_Probabilities.h5",
    "1968_3472_?1784_pos_Probabilities.h5",
]  # 8446

## 5. Check Results

### Validation

In [None]:
recalls = []
precisions = []

files_dir = f"/Users/thomasathey/Documents/mimlab/mouselight/ailey/detection_soma/brain{brain}/val/"
onlyfiles = [
    f for f in os.listdir(files_dir) if os.path.isfile(os.path.join(files_dir, f))
]
test_files = [f for f in onlyfiles if "Probabilities" in f]  # "probabilities"
print(test_files)

size_thresh = 500

thresholds = list(np.arange(0.0, 1.0, 0.02))

for threshold in thresholds:
    tot_pos = 0
    tot_neg = 0
    true_pos = 0
    false_pos = 0
    for filename in tqdm(test_files, disable=True):
        if filename in doubles:
            newpos = 2
        else:
            newpos = 1

        fname = files_dir + filename
        f = h5py.File(fname, "r")
        pred = f.get("exported_data")
        pred = pred[0, :, :, :]
        mask = pred > threshold
        labels = measure.label(mask)
        props = measure.regionprops(labels)

        if "pos" in filename:
            num_detected = 0
            tot_pos += newpos
            for prop in props:
                if prop["area"] > size_thresh:
                    if num_detected < newpos:
                        true_pos += 1
                        num_detected += 1
                    else:
                        false_pos += 1
        elif "neg" in filename:
            tot_neg += 1
            for prop in props:
                if prop["area"] > size_thresh:
                    false_pos += 1

    recall = true_pos / tot_pos
    recalls.append(recall)
    if true_pos + false_pos == 0:
        precision = 0
    else:
        precision = true_pos / (true_pos + false_pos)
    precisions.append(precision)
    if precision == 0 and recall == 0:
        fscore = 0
    else:
        fscore = 2 * precision * recall / (precision + recall)
    print(
        f"threshold: {threshold}: precision: {precision}, recall: {recall}, f-score: {fscore} for {tot_pos} positive samples in {len(test_files)} images"
    )

fscores = [
    2 * precision * recall / (precision + recall)
    if (precision != 0 and recall != 0)
    else 0
    for precision, recall in zip(precisions, recalls)
]
dict = {
    "Recall": recalls,
    "Precision": precisions,
    "F-score": fscores,
    "Threshold": thresholds,
}
df = pd.DataFrame(dict)
max_fscore = df["F-score"].max()
best_threshold = float(df.loc[df["F-score"] == max_fscore]["Threshold"].iloc[0])
best_rec = float(df.loc[df["F-score"] == max_fscore]["Recall"].iloc[0])
best_prec = float(df.loc[df["F-score"] == max_fscore]["Precision"].iloc[0])
sns.set(font_scale=2)

plt.figure(figsize=(8, 8))
sns.lineplot(data=df, x="Recall", y="Precision", estimator=np.amax, ci=False)
plt.scatter(
    best_rec,
    best_prec,
    c="r",
    label=f"Max f-score: {max_fscore:.2f} thresh:{best_threshold:.2f}",
)
plt.xlim([0, 1.1])
plt.ylim([0, 1.1])
plt.title(f"Brain {brain} Validation: {tot_pos}+ {tot_neg}-")
plt.legend()

### If results above are not adequate, improve model and try again

In my case, I identify more subvolumes from the sample at hand using the same process as for validation data, and add it as training data to the model and retrain.

### Examine best threshold

In [None]:
for filename in tqdm(test_files, disable=True):
    print(f"*************File: {filename}*********")
    if filename in doubles:
        newpos = 2
    else:
        newpos = 1

    im_fname = files_dir + filename[:-17] + ".h5"
    fname = files_dir + filename
    f = h5py.File(fname, "r")
    pred = f.get("exported_data")
    pred = pred[0, :, :, :]
    mask = pred > best_threshold
    labels = measure.label(mask)
    props = measure.regionprops(labels)

    if "pos" in filename:
        num_detected = 0
        tot_pos += newpos
        for prop in props:
            area = prop["area"]
            if area > size_thresh:
                print(f"area of detected object: {area}")
                if num_detected < newpos:
                    true_pos += 1
                    num_detected += 1
                else:
                    print(f"Soma false positive Area: {area}")
                    f = h5py.File(im_fname, "r")
                    im = f.get("image_3channel")
                    viewer = napari.Viewer(ndisplay=3)
                    viewer.add_image(im[0, :, :, :], name=filename)
                    viewer.add_image(im[1, :, :, :], name="bg")
                    viewer.add_image(im[2, :, :, :], name="endo")
                    viewer.add_labels(mask)
                    viewer.add_labels(
                        labels == prop["label"],
                        name=f"soma false positive area: {area}",
                    )
                    false_pos += 1
        if num_detected == 0:
            print(f"Soma false negative")
            f = h5py.File(im_fname, "r")
            im = f.get("image_3channel")
            viewer = napari.Viewer(ndisplay=3)
            viewer.add_image(im[0, :, :, :], name=filename)
            viewer.add_image(im[1, :, :, :], name="bg")
            viewer.add_image(im[2, :, :, :], name="endo")
            viewer.add_labels(mask, name="Soma false negative")
    elif "neg" in filename:
        for prop in props:
            area = prop["area"]
            if area > size_thresh:
                print(f"Nonsoma false positive Area: {area}")
                f = h5py.File(im_fname, "r")
                im = f.get("image_3channel")
                viewer = napari.Viewer(ndisplay=3)
                viewer.add_image(im[0, :, :, :], name=filename)
                viewer.add_image(im[1, :, :, :], name="bg")
                viewer.add_image(im[2, :, :, :], name="endo")
                viewer.add_labels(mask)
                viewer.add_labels(
                    labels == prop["label"], name=f"nonsoma false positive area: {area}"
                )
                false_pos += 1

### Paper figure for all validation

In [None]:
brains = [
    "8607",
    "8606",
    "8477",
    "8531",
    "8608",
    "8529",
    "8557",
    "8555",
    "8446",
    "8454",
    "887",
]

recalls = []
precisions = []
brain_ids = []

best_precisions = []
best_recalls = []
best_fscores = {}

for brain_id in tqdm(brains, desc="Computing validation..."):
    if brain_id == "8557":
        brain_name = "r1"
    elif brain_id == "8555":
        brain_name = "r2"
    else:
        brain_name = brain_id

    files_dir = f"/Users/thomasathey/Documents/mimlab/mouselight/ailey/detection_soma/brain{brain_name}/val/"
    onlyfiles = [
        f for f in os.listdir(files_dir) if os.path.isfile(os.path.join(files_dir, f))
    ]
    test_files = [f for f in onlyfiles if "Probabilities" in f]

    best_fscore = 0

    size_thresh = 500

    thresholds = list(np.arange(0.0, 1.0, 0.02))

    for threshold in thresholds:
        tot_pos = 0
        true_pos = 0
        false_pos = 0
        for filename in tqdm(test_files, disable=True):
            if filename in doubles:
                newpos = 2
            else:
                newpos = 1

            fname = files_dir + filename
            f = h5py.File(fname, "r")
            pred = f.get("exported_data")
            pred = pred[0, :, :, :]
            mask = pred > threshold
            labels = measure.label(mask)
            props = measure.regionprops(labels)

            if "pos" in filename:
                num_detected = 0
                tot_pos += newpos
                for prop in props:
                    if prop["area"] > size_thresh:
                        if num_detected < newpos:
                            true_pos += 1
                            num_detected += 1
                        else:
                            false_pos += 1
            elif "neg" in filename:
                for prop in props:
                    if prop["area"] > size_thresh:
                        false_pos += 1

        recall = true_pos / tot_pos
        recalls.append(recall)
        if true_pos + false_pos == 0:
            precision = 1
        else:
            precision = true_pos / (true_pos + false_pos)

        precisions.append(precision)
        if precision == 0 and recall == 0:
            fscore = 0
        else:
            fscore = 2 * precision * recall / (precision + recall)

        if fscore > best_fscore:
            best_fscore = fscore
            best_prec = precision
            best_recall = recall

        brain_ids.append(brain_id)
    best_fscores[brain_id] = best_fscore
    best_precisions.append(best_prec)
    best_recalls.append(best_recall)


for i, brain_id in enumerate(brain_ids):
    brain_ids[i] = brain_id + f" - Max F-score: {best_fscores[brain_id]:.2f}"

data = {"Sample": brain_ids, "Recall": recalls, "Precision": precisions}
df = pd.DataFrame(data=data)

sns.set(rc={"figure.figsize": (10, 7)})
sns.set(font_scale=2)
sns.lineplot(data=df, x="Recall", y="Precision", hue="Sample", estimator="max", ci=None)
sns.scatterplot(x=best_recalls, y=best_precisions)

## 6. Make Annotation layers

### Transformed layers

In [None]:
atlas_vol = CloudVolume(
    "precomputed://https://open-neurodata.s3.amazonaws.com/ara_2016/sagittal_10um/annotation_10um_2017"
)
for layer in [
    antibody_layer,
    background_layer,
]:  # axon_mask is transformed into an image because nearest interpolation doesnt work well after downsampling
    layer_path = brain2paths[brain]["base"] + layer + "_transformed"
    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type="image",
        data_type="uint16",  # Channel images might be 'uint8'
        encoding="raw",  # raw, jpeg, compressed_segmentation, fpzip, kempressed
        resolution=atlas_vol.resolution,  # Voxel scaling, units are in nanometers
        voxel_offset=atlas_vol.voxel_offset,
        chunk_size=[32, 32, 32],  # units are voxels
        volume_size=atlas_vol.volume_size,  # e.g. a cubic millimeter dataset
    )
    vol_mask = CloudVolume(layer_path, info=info)
    vol_mask.commit_info()

## 7. Apply ilastik to whole image

### \* Inputs \*
You can use the notebook code below or the script using `soma_detect_image.py`

In [None]:
brain = "test"
antibody_layer = "antibody"
background_layer = "background"
endogenous_layer = "endogenous"

threshold = 0.28  # threshold to use for ilastik
data_dir = "/Users/thomasathey/Documents/mimlab/mouselight/brainlit_parent/brainr_temp/"  # "/data/tathey1/matt_wright/brainr_temp/"  # directory to store temporary subvolumes for segmentation
results_dir = "/Users/thomasathey/Documents/mimlab/mouselight/brainlit_parent/brainr_results/"  # directory to store coordinates of soma detections

# Ilastik will run in "headless mode", and the following paths are needed to do so:
ilastik_path = "/Applications/ilastik-1.4.0b21-OSX.app/Contents/ilastik-release/run_ilastik.sh"  # "/data/tathey1/matt_wright/ilastik/ilastik-1.4.0rc5-Linux/run_ilastik.sh"  # path to ilastik executable
ilastik_project = "/Users/thomasathey/Documents/mimlab/mouselight/ailey/detection_soma/matt_soma_rabies_pix_3ch.ilp"  # "/data/tathey1/matt_wright/ilastik/soma_model/matt_soma_rabies_pix_3ch.ilp"  # path to ilastik project

max_coords = [3072, 4352, 1792]  # -1 if you want to process the whole dimension
ncpu = 1  # 16  # number of cores to use for detection
chunk_size = [256, 256, 256]  # [256, 256, 300]

In [None]:
layer_names = [antibody_layer, background_layer, endogenous_layer]

ilastik_largeimage = ApplyIlastik_LargeImage(
    ilastik_path=ilastik_path,
    ilastik_project=ilastik_project,
    results_dir=results_dir,
    ncpu=1,
)
ilastik_largeimage.apply_ilastik_parallel(
    brain_id=brain,
    layer_names=layer_names,
    threshold=threshold,
    data_dir=data_dir,
    chunk_size=chunk_size,
    max_coords=max_coords,
)
ilastik_largeimage.collect_results(brain_id="test")

## 8. Register volume and transform data to atlas space using CloudReg

### 8a. You need to find an initial affine alignment using cloudreg.scripts.registration.get_affine_matrix. For example: 

A link to the ARA parcellation is:

`precomputed://https://open-neurodata.s3.amazonaws.com/ara_2016/sagittal_10um/annotation_10um_2017`

And some python commands to help with affine alignment is:

```
from cloudreg.scripts.registration import get_affine_matrix
get_affine_matrix([1,1,1], [15,0,0], "PIR", "RAI", 1.15, "precomputed://https://open-neurodata.s3.amazonaws.com/ara_2016/sagittal_10um/annotation_10um_2017")
```

### 8b. Run registration using cloudreg.scripts.registration. For example:

```
python -m cloudreg.scripts.registration -input_s3_path precomputed://s3://smartspim-precomputed-volumes/2023_01_20/MPRRabies/Ch_561 --output_s3_path precomputed://s3://smartspim-precomputed-volumes/2023_01_20/MPRRabies/atlas_to_target --atlas_s3_path https://open-neurodata.s3.amazonaws.com/ara_2016/sagittal_50um/average_50um --parcellation_s3_path https://open-neurodata.s3.amazonaws.com/ara_2016/sagittal_10um/annotation_10um_2017 --atlas_orientation PIR -orientation RPI --rotation 0 0 0 --translation 0 0 0 --fixed_scale 1.07 -log_s3_path precomputed://s3://smartspim-precomputed-volumes/2023_01_20/MPRRabies/atlas_to_target --missing_data_correction True --grid_correction False --bias_correction True --regularization 5000.0 --iterations 3000 --registration_resolution 100
```

### 8c. Transform data to atlas space using CloudReg

### Soma coordinates

```
python -m cloudreg.scripts.transform_points --target_viz_link https://viz.neurodata.io/?json_url=https://json.neurodata.io/v1?NGStateID=6ti276yAxXF_Rw --atlas_viz_link https://ara.viz.neurodata.io/?json_url=https://json.neurodata.io/v1?NGStateID=HvyNDGaPsd1wyg --affine_path /mnt/NAS/Neuroglancer\ Data/  --velocity_path /mnt/NAS/Neuroglancer\ Data/  --transformation_direction atlas
```

or

```
python -m cloudreg.scripts.transform_points --target_viz_link https://viz.neurodata.io/?json_url=https://json.neurodata.io/v1?NGStateID=05Fhxt5VBT-_1A --atlas_viz_link https://ara.viz.neurodata.io/?json_url=https://json.neurodata.io/v1?NGStateID=HvyNDGaPsd1wyg --affine_path /cis/home/tathey/MPRRabies_Ch_561_registration/downloop_1_A.mat   --velocity_path /cis/home/tathey/MPRRabies_Ch_561_registration/downloop_1_v.mat  --transformation_direction atlas
```

This will produce a neuroglancer link with the transformed soma coordinates, which should be added to `soma_data.py` under the `somas_atlas_url` key. Then the code below, or `soma_brainrender.py`, can be used to visualize the data.

### Image

```
python -m cloudreg.scripts.transform_data --target_layer_source precomputed://s3://smartspim-precomputed-volumes/2022_09_20/887/Ch_647 --transformed_layer_source precomputed://s3://smartspim-precomputed-volumes/2022_09_20/887/Ch_647_transformed --affine_path /cis/home/tathey/887_Ch_561_registration/downloop_1_A.mat  --velocity_path /cis/home/tathey/887_Ch_561_registration/downloop_1_v.mat
```

## 9. View results in brain space

### \*Inputs\*

In [None]:
brain_ids = [
    "8557",
    "8555",
    "8607",
    "8606",
    "8477",
    "8531",
    "8608",
    "8529",
    "8454",
    "MPRRabies",
]
colors = {
    "tph2 vglut3": "blue",
    "tph2 gad2": "red",
    "gad2 vgat": "green",
}  # colors for different genotypes
symbols = ["o", "+", "^", "vbar"]
fold_on = False

In [None]:
sd = SomaDistribution(brain_ids=brain_ids)
sd.napari_coronal_section(
    z=1000, subtype_colors=colors, symbols=symbols, fold_on=fold_on
)

In [None]:
sd = SomaDistribution(brain_ids=brain_ids)
sd.brainrender_somas(subtype_colors=colors)

## 10. Display bar charts

In [None]:
regions = [
    688,  # cerebral cortex
    95, # agranular insular area
    714, # orbital area
    698,  # olfactory areas
    1089,  # hippocampal formation
    # 583, # claustrum
    477,  # striatum
    # 803, # pallidum
    351,  # bed nuclei of stria terminalis
    # 703, #cortical subplate
    1097,  # hypothalamus
    157, #periventricular zone
    515, # medial preoptic nucleus
    290, # hypothalamic lateral zone
    331, #mammillary body
    797, # zona incerta
    549,  # thalamus
    186,  # lateral habenula
    519,  # cerebellar nuclei
    846, #dentate nucleus
    726, # dentate gyrus
    313,  # midbrain
    157, #inferior colliculus
    1052, #pedunculopontine
    128, #midbrain reticular nucleus
    214, # red nucleus
    1065,  # hindbrain
    867, #parabrachial nucleus
    701, #vestibular nuclei
    972, # prelimbic
    44, # infralimbic
]  # allen atlas region IDs to be shown
# see: https://connectivity.brain-map.org/projection/experiment/480074702?imageId=480075280&initImage=TWO_PHOTON&x=17028&y=11704&z=3

composite_regions = {
    "Amygdalar Nuclei": [131, 295, 319, 780], "Substantia Nigra": [615, 374, 374], "Superior Colliculus": [294, 302]
}  # Custom composite allen regions where key is region name and value is list of allen regions

brain_ids = ["8557","8555","8607","8606","8477", "8531", "8608", "8529", "8454", "8446", "MPRRabies"]
sd = SomaDistribution(brain_ids=brain_ids)


In [None]:
id_to_regioncounts = {}
atlas_points = {}

for brain_id in brain_ids:
    pkl_file = f"/Users/thomasathey/Documents/mimlab/mouselight/ailey/detection_soma/wholebrain_results/quantification_dict_{brain_id}.pickle"
    with open(pkl_file, "rb") as handle:
        q_dict = pickle.load(handle)
    id_to_regioncounts[brain_id] = q_dict

    total_somas = 0
    for region in q_dict.keys():
        total_somas += q_dict[region]
    atlas_points[brain_id] = np.zeros((total_somas,0))

sd.atlas_points = atlas_points
sd.id_to_regioncounts = id_to_regioncounts
sd.region_graph = sd._setup_regiongraph()

In [None]:
sd.region_barchart(regions, composite_regions=composite_regions, normalize_region=872)

### Compare distributions with PCA

In [None]:
X = []
brain_ids = []
genotypes = []

for i, brain in enumerate(brains):
    print(brain)
    region_order = list(df.loc[df["Brain ID"] == brain]["Region"])

    if i == 0:
        standard_region_order = region_order
    elif standard_region_order != region_order:
        raise ValueError(f"Different region order for brain {brain}")

    distrib = list(df.loc[df["Brain ID"] == brain]["Percent of Total Somas (%)"])
    X.append(distrib)

    brain_ids.append(brain)
    genotypes.append(brains[brain])

X = np.array(X)

pca = PCA(n_components=2)
X_2 = pca.fit_transform(X)

df_pca = {
    "PC 1": X_2[:, 0],
    "PC 2": X_2[:, 1],
    "Genotype": genotypes,
    "Brain ID": brain_ids,
}
df_pca = pd.DataFrame(data=df_pca)

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
sns.scatterplot(data=df_pca, x="PC 1", y="PC 2", hue="Genotype", ax=ax)


for i in range(df_pca.shape[0]):
    plt.text(
        x=df_pca["PC 1"][i] + 0.03,
        y=df_pca["PC 2"][i] + 0.03,
        s=df_pca["Brain ID"][i],
        fontdict=dict(color="black", size=20),
    )

plt.title(
    f"Input Distribution PCA with Explained Variance: {pca.explained_variance_ratio_}"
)

## Create local volume

In [None]:
vol = CloudVolume(
    "precomputed://s3://smartspim-precomputed-volumes/2022_03_10/8531/Ch_647_iso"
)

In [None]:
subvol = vol[256 * 11 : 256 * 12, 256 * 16 : 256 * 17, 256 * 6 : 256 * 7]

In [None]:
info = CloudVolume.create_new_info(
    num_channels=1,
    layer_type="image",
    data_type="uint16",  # Channel images might be 'uint8'
    encoding="raw",  # raw, jpeg, compressed_segmentation, fpzip, kempressed
    resolution=vol.resolution,  # Voxel scaling, units are in nanometers
    voxel_offset=vol.voxel_offset,
    chunk_size=vol.chunk_size,  # units are voxels
    volume_size=vol.volume_size,  # e.g. a cubic millimeter dataset
)

vol_ex = CloudVolume(
    "precomputed://file:///Users/thomasathey/Documents/mimlab/mouselight/brainlit_parent/brainlit/brainlit/BrainLine/data/example",
    info=info,
    compress=False,
)
vol_ex.commit_info()

In [None]:
vol_ex[256 * 11 : 256 * 12, 256 * 16 : 256 * 17, 256 * 6 : 256 * 7] = subvol

In [None]:
256 * 7

In [None]:
vol = CloudVolume(
    "precomputed://file:///Users/thomasathey/Documents/mimlab/mouselight/brainlit_parent/brainlit/brainlit/BrainLine/data/example/endogenous",
    fill_missing=True,
)