Python notebook to calculate mean images for a subject.  The mean image will be saved in the dataset for the subject

In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import glob
from pathlib import Path
import pickle

import pandas as pd
import pyspark

from janelia_core.dataprocessing.dataset import ROIDataset
from janelia_core.dataprocessing.image_stats import std_through_time

## Parameters go here

In [6]:
ps = {}

# Specify where the excel file with datset locations is saved
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'

# Specify additional parameters needed to locate each dataset
ps['dataset_folder'] = 'extracted'
ps['dataset_base_folder'] = r'K:\\SV4'

ps['dataset_row'] = 0 # Row of 

## Load the dataset

In [10]:
# Read in dataset locations
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

# Read in the first dataset
dataset_path = (Path(ps['dataset_base_folder']) / data_locs['Main folder'][ps['dataset_row']] / 
                data_locs['Subfolder'][ps['dataset_row']] / Path(ps['dataset_folder']) / '*.pkl')
dataset_file = glob.glob(str(dataset_path))[0]

with open(dataset_file, 'rb') as f:
    dataset = ROIDataset.from_dict(pickle.load(f))

## Create a spark context

In [13]:
conf = pyspark.SparkConf().setMaster('local[20]').setAll([
    ('spark.executor.memory', '10g'), ('spark.driver.memory','400g'), ('spark.driver.maxResultSize', '300g')])
sc = pyspark.SparkContext(conf=conf)

## Generate mean image

In [26]:
image_files = dataset.ts_data['imgs']['vls']
image_files = [d['file'] for d in image_files]
image_stats = std_through_time(images=image_files, sc=sc)

Processing 5416 images with spark.
Done processing 5416 images.


## Save the dataset with the image stats in it

In [36]:
dataset.stats = image_stats
with open(dataset_file, 'wb') as f:
    pickle.dump(dataset.to_dict(), f)