In [1]:
from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)
# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)


In [2]:
# all modules necessary for this nb
import os
import sys
import pickle

import numpy as np
import pylab as pl
from sklearn.covariance import EmpiricalCovariance
from sklearn.cluster import KMeans, AffinityPropagation
from sklearn.metrics import silhouette_score as clust_score
from sklearn.preprocessing import StandardScaler
from scipy import stats as sstats

# setting parameters for default matplotlib plots
%matplotlib inline

In [3]:
pl.style.use('fabiostyle')
from ipywidgets import interact

# needs to find the library of functions
sys.path.append('../../../../../code/')  # to be replaced!

import utils as ut
import plots as pt

In [4]:
# a double percentage sign indicates a magic function. in this case, now we are writing this cell in javascript.

In [5]:
NOTEBOOK_NAME = 'preprocessing'

In [6]:
from pickleshare import PickleShareDB

autorestore_folder = os.path.join(os.getcwd(), 'autorestore', NOTEBOOK_NAME)
db = PickleShareDB(autorestore_folder)
import sys
from workspace import *
import IPython
ip = IPython.get_ipython()

# this will restore all the saved variables. ignore the errors listed.
load_workspace(ip, db)

# use `save_worspace(db)` to save variables at the end

In [7]:
data_folder = '../data'

In [8]:
traces = np.loadtxt(os.path.join(data_folder, 'C.txt')).T #denoised traces
traces_raw = np.loadtxt(os.path.join(data_folder, 'C_raw.txt')).T
areas = ut.load_spatial_footprints_A(os.path.join(data_folder, 'A.txt'))
events = np.loadtxt(os.path.join(data_folder, 'S.txt')).T
# dff = np.loadtxt(os.path.join(data_folder, 'C_df.txt')).T
# mean_image, contours = ut.load_spatial_footprints(os.path.join(data_folder, 'Coor.mat'),
#                                                   os.path.join(data_folder, 'Cnn.txt'),
#                                                   key='coor')

In [9]:
filename = os.path.join(data_folder, 'behavior_codes.txt')
behavior = ut.read_behavior(filename)
events_list = np.unique([b[1] for b in behavior])

In [10]:
# grab time axis from the xml file

import xml.etree.ElementTree as ET
xmlfile = os.path.join(data_folder, 'tseries.xml')
print "I infer the time axis from:\n", xmlfile
tree = ET.parse(xmlfile)
root = tree.getroot()

# unfortunately we miss the first frame
time_ax = np.r_[[child.attrib['absoluteTime']
                 for child in root.iter('Frame')]].astype(float)



I infer the time axis from:
../data/tseries.xml


In [11]:
# sync times
start_2p = ut.parse_behavior(behavior, 'BEGIN')[0]
behavior = [[float(b[0])-start_2p, b[1]] for b in behavior]
time_ax -= time_ax[0]

In [12]:
print len(traces)
print len(time_ax)

9230
18460


In [13]:
ratio = int(np.floor(time_ax.shape[0]/traces.shape[0]))
print ratio

2


In [14]:
time_ax = time_ax[::ratio] # use this if video was averaged and need to adjust xml output to match

In [15]:
print len(traces)
print len(time_ax)

9230
9230


In [16]:
time_ax = time_ax[0:len(traces)] # use this if any video frames were truncated (often need to do this a video is averaged)

In [17]:
print len(traces)
print len(events)
print len(time_ax)

9230
9230
9230


In [18]:
# make sure presentations are correct in timing.
b = [b[1] for b in behavior]
print b
        

['BEGIN', 'TONE_PR', 'PRE-REWARD', 'LICK', 'LICK', 'LICK', 'LICK', 'END', 'LICK', 'LICK', 'BEGIN', 'TONE_PR', 'PRE-REWARD', 'LICK', 'LICK', 'LICK', 'END', 'BEGIN', 'TONE_PR', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'PRE-REWARD', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'END', 'BEGIN', 'TONE_PR', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'PRE-REWARD', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'END', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'BEGIN', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'TONE_PR', 'LICK', 'PRE-REWARD', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'END', 'BEGIN', 'TONE_PR', 'LICK', 'PRE-REWARD', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'END', 'BEGIN', 'TONE_PR', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'PRE-REWARD', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'LICK', 'END', 'BEGIN', 'TONE_PR', 'LICK', 'LIC

In [19]:
#get timestamps to remove prereward
behavior = ut.sync_behavior_to_xml(time_ax, behavior)
start_t=0
b_c=0
b_c_r=0
for b in behavior:
    if b[1] == 'TONE_CSM' or b[1] == 'TONE_SH' or b[1] == 'TONE_RW':
        if start_t < 1:
            start_t = b[0]

r_begin=[]
for b in behavior:
    b_c=b_c+1
    if b[1] == 'BEGIN' and b[0]<start_t:
        r_begin = b[0]
        b_c_r = b_c-1
#print r_begin
print b_c_r

time_ax_begin = behavior[b_c_r][0]

count=0
final_count=0
min_1 = 0
min_2 = 5
for i in time_ax:
    if i < time_ax_begin:
        min_1 = time_ax_begin - i
    elif i > time_ax_begin:
        min_1 = i-time_ax_begin
    if min_1 < min_2:
        min_2 = min_1
        final_count=count
    count = count+1
    
    
print time_ax_begin

time_ax = time_ax[final_count:len(time_ax)]
events = events[final_count:len(events)]
traces = traces[final_count:len(traces)]
behavior = behavior[b_c_r:len(behavior)]

b_c=0
b_sub = behavior[0][0]
for b in behavior:
    behavior[b_c] = [b[0]-b_sub, b[1]]
    b_c=b_c+1
#behavior = ut.sync_behavior_to_xml(time_ax, behavior)
print behavior

[ 0.98628429  0.98637168  0.98642034  0.98645235  0.98647501  0.98649199
  0.98650523  0.98646356  0.98652435  0.98653402  0.98654012  0.98654298
  0.98654796  0.98655465  0.98650916  0.9865632   0.98656479  0.98656877
  0.98657399  0.98657637  0.98658174  0.98658787  0.98659011  0.98659117
  0.98659326  0.98659662  0.98660376  0.98660629  0.9866095   0.98661387
  0.98661702  0.98662227  0.98662188  0.98662377  0.98662813  0.98662949
  0.9866352   0.98663815  0.98658903  0.98664142  0.98664664  0.98664744
  0.98665242  0.9866551   0.98665749  0.98666026  0.98666533  0.98666786
  0.9866678   0.98666904  0.98667376  0.98667791  0.98668171  0.9866826
  0.98668693  0.98669082  0.98664019  0.98669218  0.98669571  0.98674962
  0.98664682  0.98670355  0.98670532  0.98665643  0.9867108   0.9867136
  0.98671823  0.98672118  0.98672507  0.98672475  0.98672752  0.98672946
  0.98672964  0.98673085  0.98673373  0.98678989  0.98673957  0.9867439
  0.98674785  0.98674682  0.98674656  0.98674827  0.98

In [20]:
len(events)

8519

In [21]:
len(time_ax)

8519

In [22]:
time_ax=time_ax-time_ax[0]

In [23]:
time_ax

array([  0.00000000e+00,   2.67775176e-01,   5.35550352e-01, ...,
         2.74915254e+03,   2.74942039e+03,   2.74968825e+03])

In [24]:
# -----------------------------------------------------------
# these times are relative to the single cycle
# and centered around tone onset
CYCLE_START = -5  # seconds
CS_DURATION = 2  # seconds
DELAY = 2  # seconds
AFTER_DELAY_DURATION = 4  # seconds
US_START = 4
CYCLE_DURATION = abs(CYCLE_START) + CS_DURATION + DELAY + AFTER_DELAY_DURATION

# -----------------------------------------------------------
# these times are absolute times, taken from the arduino file
# when the tones starts and ends
odor_CSm_ons = ut.parse_behavior(behavior, 'TONE_CSM')
odor_CSm_offs = ut.parse_behavior(behavior, 'TONE_CSM', offset=CS_DURATION)
odor_rw_ons = ut.parse_behavior(behavior, 'TONE_RW')
odor_rw_offs = ut.parse_behavior(behavior, 'TONE_RW', offset=CS_DURATION)
odor_sh_ons = ut.parse_behavior(behavior, 'TONE_SH')
odor_sh_offs = ut.parse_behavior(behavior, 'TONE_SH', offset=CS_DURATION)
rewards = np.r_[ut.parse_behavior(behavior, 'REWARD')]
licks = np.r_[ut.parse_behavior(behavior, 'LICK')]

# -----------------------------------------------------------
# when the experiment starts and ends, in absolute time
cycles_starts = ut.parse_behavior(behavior, 'TONE_*', offset=CYCLE_START) #looks for arduino line that begins w/ either 'R' or 'S'
cycles_ends = ut.parse_behavior(behavior, 'TONE_*', offset=CYCLE_DURATION+CYCLE_START)

cycle_subtract = 0   #do we need to subtract off the last cycle because it's too short???
if cycle_subtract !=0:
    cycles = np.r_[zip(cycles_starts,  # offset will be ADDED, with sign
                   cycles_ends)][:cycle_subtract]
else:
    cycles = np.r_[zip(cycles_starts,  # offset will be ADDED, with sign
                   cycles_ends)]
print 'we are subtracting off this many cycles'
print cycle_subtract
# -----------------------------------------------------------
# which trials are a.p. and which reward
is_rewardt = [any(map(lambda t: (t>=s) and (t<e), odor_rw_ons)) for s, e in zip(cycles_starts, cycles_ends)]
is_CSmt = [any(map(lambda t: (t>=s) and (t<e), odor_CSm_ons)) for s, e in zip(cycles_starts, cycles_ends)]
is_sht = [any(map(lambda t: (t>=s) and (t<e), odor_sh_ons)) for s, e in zip(cycles_starts, cycles_ends)]


we are subtracting off this many cycles
0


In [25]:
is_rewarded = []
for s, e in cycles:
# s, e = cycles[np.where(is_rewardt)[0][i]]
    try:
        r = rewards[(rewards>=s)*(rewards<e)][0]
        later_licks = licks-r
        is_rewarded.append((np.where((later_licks>=0) * (later_licks<2))[0][0])>0)
    except IndexError:
        is_rewarded.append(False)

In [26]:
licks = np.r_[ut.parse_behavior(behavior, 'LICK')]
licks_bs = 1.*ut.compute_licks_during(licks, cycles,
                                      start=-CYCLE_START-DELAY,
                                      end=-CYCLE_START)  # w.r.t. cycle start
licks_cs = 1.*ut.compute_licks_during(licks, cycles,
                                      start=-CYCLE_START,
                                      end=-CYCLE_START+CS_DURATION)
licks_tc = 1.*ut.compute_licks_during(licks, cycles,
                                      start=-CYCLE_START+CS_DURATION,
                                         end=-CYCLE_START+CS_DURATION+DELAY)
licks_cs_tc = 1.*ut.compute_licks_during(licks, cycles,
                                         start=-CYCLE_START,
                                         end=-CYCLE_START+CS_DURATION+DELAY)
licks_tc_us = 1.*ut.compute_licks_during(licks, cycles,
                                         start=-CYCLE_START+CS_DURATION,
                                         end=-CYCLE_START+CS_DURATION+DELAY+AFTER_DELAY_DURATION)
licks_cs_tc_us = 1.*ut.compute_licks_during(licks, cycles,
                                      start=-CYCLE_START,
                                      end=-CYCLE_START+CS_DURATION+DELAY+AFTER_DELAY_DURATION)
lickrates_bs = 1.*licks_bs/(DELAY+AFTER_DELAY_DURATION)
lick_ratios = np.nan_to_num(1.*(licks_tc_us-licks_bs)/(licks_tc_us+licks_bs))
lick_di = np.nan_to_num(1.*(np.mean(licks_tc[is_rewardt]-licks_bs[is_rewardt]) -
                            np.mean(licks_tc[is_CSmt]-licks_bs[is_CSmt]))/np.sqrt(0.5*(np.std(licks_tc)**2+np.std(licks_bs)**2)))
    
good_lick_trials = (licks_bs+licks_tc_us) >= 5

is_errCSmt = (lick_ratios>0.8) * ((licks_tc_us+licks_bs) > 4) * is_CSmt
print is_errCSmt.sum()

is_errsht = (lick_ratios>0.8) * ((licks_tc_us+licks_bs) > 4) * is_sht
print is_errsht.sum()

is_corrCSmt = ((licks_tc_us)==0) * is_CSmt
print is_corrCSmt.sum()

is_corrsht = ((licks_tc_us)==0) * is_sht
print is_corrsht.sum()

print licks_bs

1
3
38
28
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  2.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  2.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  3.  0.  0.  0.  0.  0.  0.]




In [27]:
time_ax_single = ut.extract_single_cycle_time_ax(time_ax, cycles,
                                                 cycle_duration=CYCLE_DURATION, cycle_start=CYCLE_START)

In [28]:
time_ax_single

array([-5.        , -4.73214247, -4.46428494, -4.19642742, -3.92856989,
       -3.66071236, -3.39285483, -3.1249973 , -2.85713978, -2.58928225,
       -2.32142472, -2.05356719, -1.78570966, -1.51785214, -1.24999461,
       -0.98213708, -0.71427955, -0.44642202, -0.1785645 ,  0.08929303,
        0.35715056,  0.62500809,  0.89286562,  1.16072314,  1.42858067,
        1.6964382 ,  1.96429573,  2.23215326,  2.50001078,  2.76786831,
        3.03572584,  3.30358337,  3.5714409 ,  3.83929842,  4.10715595,
        4.37501348,  4.64287101,  4.91072854,  5.17858606,  5.44644359,
        5.71430112,  5.98215865,  6.25001618,  6.5178737 ,  6.78573123,
        7.05358876,  7.32144629,  7.58930382,  7.85716134])

In [29]:
save_workspace(db)

Could not store variable 'ET'. Skipping...
Could not store variable 'di'. Skipping...
Could not store variable 'pickle'. Skipping...
Could not store variable 'os'. Skipping...
Could not store variable 'IPython'. Skipping...
Could not store variable 'pt'. Skipping...
Could not store variable 'sstats'. Skipping...
Could not store variable 'pl'. Skipping...
Could not store variable 'ut'. Skipping...
Could not store variable 'ip'. Skipping...
Could not store variable 'np'. Skipping...
Could not store variable 'sys'. Skipping...
