In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import h5py as h5
import os
import time
import glob
import multiprocessing as mp

import custom

---
# Elevated-plus maze

In [None]:
behav_source = 'D:/Dropbox (Stuber Lab)/We PNOC-ing/Latest PNOC Data/miniscope/PNOC_EPM/PNOC_Behavior/*.xlsx'
trace_source = 'D:/Dropbox (Stuber Lab)/We PNOC-ing/Latest PNOC Data/miniscope/PNOC_EPM/PNOC_Traces/*.txt'
del_epm = 'EPM/del_epm.csv'

h5_outfile = 'data/epm.h5'

In [None]:
# Parameters

bin_size = 200
exp_dur = 600000
n_cores = 6

## Import data

### Import behavioral data

In [None]:
# Import data

def import_behav(filename):
    _, subj, epoch = os.path.splitext(os.path.basename(filename))[0].split('_')
    data = custom.etho_extract(filename)
    data.index = data.index * 1000
    
    return (subj, epoch), data

behav_files = glob.glob(behav_source)
p = mp.Pool(processes=n_cores)
exps, behav_import = zip(*p.map(import_behav, behav_files))

In [None]:
# Correct for extra 10 s at beginning of behavioral data
for data in behav_import:
    data.index -= 10000

### Downsample data

In [None]:
# Create dataframe from all animals

ts = np.arange(0, exp_dur, bin_size)
subjs = [x for x, _ in exps]

dfs = {}  # Dictionary to store DataFrame from each animal
for subj, data in zip(subjs, behav_import):
    data_ds = custom.resample(data, data.index, ts, method=np.nanmean)
    ds_df = pd.DataFrame(data_ds, columns=data.columns, index=ts)
    ds_df.columns.names = ['feature']
    ds_df.index.names = ['timestamp']
    dfs[subj] = ds_df

# Create DataFrame for all data
behav_df = pd.concat(dfs, axis=1, names=['subject', 'feature'])
behav_df = behav_df.sort_index(axis=1, level=0)

### Import calcium imaging data
Each session is 1499 or 1500 frames

In [None]:
# Parameters

frame_dur = 200

In [None]:
trace_files = glob.glob(trace_source)

trace_import = {
    os.path.basename(f).split('_')[1]: pd.DataFrame(np.loadtxt(f, delimiter=',').T)
    for f in trace_files
}

In [None]:
# Create DataFrame

neural_df = pd.concat(trace_import, axis=1)
neural_df.columns.names = ['subject', 'neuron']
neural_df.index = np.arange(0, exp_dur, frame_dur)

### Downsample data (if necessary)

In [None]:
ts = np.arange(0, exp_dur, bin_size)
data_ds = custom.resample(neural_df_orig, neural_df.index, ts, method=np.nanmean)

In [None]:
neural_df = pd.DataFrame(neural_df, columns=neural_df.columns, index=ts)
neural_df.index.name = 'timestamp'

### Clean data

In [None]:
# Remove bad data

# Import files with cells to delete
delete_import = pd.read_csv(del_epm, delimiter=',')
delete_import -= 1
delete_import = delete_import.unstack().dropna()
delete_import = delete_import.reset_index(level=-1, drop=True).astype(int)
delete_import = delete_import.reset_index()
delete_import.columns = ['subject', 'neuron']

# Delete cells
neural_df_cleaned = neural_df.drop([tuple(x) for x in delete_import.as_matrix()], axis=1)

### Save data

In [None]:
with pd.HDFStore(h5_outfile) as hf:
    hf['behav'] = behav_df
    hf['neural'] = neural_df_cleaned

---
<a id='headfixed'></a>
# Headfixed exposure
Create behavioral file with `pupilize`

In [None]:
frame_dur = 200
threshold = 225

# Input files
del_tmt = os.path.join(data_dir, '2P Data/del_hf-tmt.csv')
raw_data_tmt = os.path.join(data_dir, '2P Data/PNOC_Behavior')
ca_files = glob.glob(os.path.join(data_dir, '2P Data/PNOC_HFTMT/PNOC_Traces/*.txt')

# Output files
h5_out = os.path.join(data_dir, '2P Data/headfixed.h5')
h5_out_tmt = os.path.join(data_dir, 'We PNOC-ing/Latest PNOC Data/hf-data-tmt.h5')

## Create behavioral data

In [None]:
!"organize_behav.py" -n 7 -t "$treshold" -b "$frame_dur" -o "$h5_out_tmt" "$raw_data_tmt"

In [None]:
# Combine datasets

# Read individual dataset files
with pd.HDFStore(h5_out) as hf, pd.HDFStore(h5_out_tmt, 'r') as hf_tmt:
    hf['behav'] = hf_tmt['behav']
    hf['behav'] = hf['behav'].rename(index={'ctrl': 'h2o', 'stim': 'odor'})

    hf['behav'] = df_behav

# Remove individual dataset files
os.remove(h5_out_tmt)

## Import neural data

In [None]:
# Parameters
frames_per_epoch = 1505

Need to check frame counts on new files

In [None]:
# Number of frames for each epoch
frame_ct = {
    'J31_TMT_A_P1': [1505, 1505, 1505],
    'J50_TMT_A_P1': [1505, 1505, 1505],
    'J51_TMT_A_P1': [1504, 1504, 1504],
    'J52_TMT_A_P1': [1505, 1505, 1505],
    'J53_TMT_B_P1': [1505, 1505, 1505],
    'J55_TMT_B_P1': [1505, 1505, 1505],
}

In [None]:
# Import data

ca_import = {
    tuple(os.path.splitext(os.path.basename(f))[0].split('_')): np.loadtxt(f, delimiter=',')
    for f in ca_files
}

In [None]:
# Create dataframe for calcium traces

# Create new dictionary with key for each neuron
ca_data = {}
for exp, exp_data in ca_import.iteritems():
    n_cells, n_frames = exp_data.shape
    exp_id = frame_ct['_'.join(exp[1:])]
    
    epoch_split = np.split(exp_data, np.cumsum(exp_id)[:2], axis=1)
    epoch_split_new = [
        np.concatenate([epoch, np.nan * np.zeros((n_cells, frames_per_epoch - nf))], axis=1)
        for epoch, nf in zip(epoch_split, exp_id)
    ]
    traces_new = np.concatenate(epoch_split_new, axis=1)
    
    for n, cell_data in enumerate(traces_new):
        ca_data[exp + (n, )] = cell_data

# Create dataframe
neural_df = pd.DataFrame(ca_data)

# Format columns
col_names = ['data type', 'subject', 'experiment', 'order', 'plane', 'neuron']
col_order = ['data type', 'experiment', 'subject', 'plane', 'order', 'neuron']
neural_df.columns.names = col_names
neural_df = neural_df.reorder_levels(col_order, axis=1)
neural_df = neural_df.sort_index(axis=1)

# Format index
neural_df.index = pd.MultiIndex.from_product(
    [['base', 'h2o', 'odor'], np.arange(frames_per_epoch) * frame_period],
    names=['epoch', 'time']
)

In [None]:
# Remove bad data

delete_import = pd.read_csv(del_tmt, delimiter=',')
delete_import.columns = pd.MultiIndex.from_tuples(
    [[x[0], ] + x[1].split('_') for x in delete_import.columns]
)

# Cells to delete from TMT dataset
to_delete = [
    col + (int(x) - 1, )
    for col in delete_import for x in delete_import[col]
    if not np.isnan(x)
]

temp = neural_df.T.reset_index(['data type', 'order']).T
temp = temp.drop(to_delete, axis=1)
temp = temp.T.set_index(['data type', 'order'], append=True).T
temp = temp.reorder_levels(col_order, axis=1)
temp = temp.sort_index(axis=1)
neural_df = temp.dropna(axis=0).astype(float)

### Save data

In [None]:
with pd.HDFStore(h5_out) as hf:
    hf['neural'] = neural_df