In [1]:
import scipy as sp
import scipy.io
import os
import numpy as np
import pandas as pd
import glob
import csv
import random as rand
from tqdm import tnrange, tqdm_notebook
from collections import Iterable
import matplotlib.pylab as plt
import random as rand
from ipywidgets import *
from scipy import stats
import importlib
import sys
sys.path.append(os.getcwd()+'/../')
from utils import utils
from utils import utils, zscores
from utils import plotting_utils as pu
from tqdm import tqdm
import warnings

import cmocean
cmap = cmocean.cm.thermal
colors = cmap

# from utils import auc_methods as ama
# import matplotlib.patches as patches
# from matplotlib import gridspec
# from sklearn.metrics import roc_curve, auc
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import label_binarize
# from matplotlib_venn import venn2

%load_ext autoreload
%autoreload 2
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
plt.close('all')

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 6}

mpl.rc('font', **font)
mpl.rc('xtick', labelsize=6) 
mpl.rc('ytick', labelsize=6)
mpl.rc('axes', labelsize=6)

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
size_mult = 1

  from collections import Iterable


In [2]:
plt.close('all')

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 16}

mpl.rc('font', **font)
mpl.rc('xtick', labelsize=16) 
mpl.rc('ytick', labelsize=16)
mpl.rc('axes', labelsize=16)

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

In [3]:
DATA_DIR = os.path.abspath('../data')
##### uncomment below if the processed data is already saved
log_df = log_df = pd.read_hdf(os.path.join(DATA_DIR, 'log_df_processed_02-28-2019.h5'), 'fixed')
unit_key_df = pd.read_hdf(os.path.join(DATA_DIR, 'unit_key_df_processed_02-28-2019.h5'), 'fixed')

In [4]:
target_trials = log_df['trial_label'].isin(['Touch Stim Hit', 'Touch Stim Miss'])
target_unit = log_df['uni_id'] == '07903-22-164t2'

example = log_df[target_unit & target_trials]

pos_trial = 'Touch Stim Hit'
labels, spikes = (example['trial_label'] == pos_trial).values, example['spike_counts(stim_aligned)'].values
spikes = np.vstack(spikes)[:, 39:59]/40 # first 500ms after stim onset; divide by 40 to go from per sec FR to raw spike count


In [5]:
def cacl_mean_diff(labels, values, shuff = True):
    if shuff:
        labels = np.random.permutation(labels)
    
    pos_mean = np.mean(values[labels, :])
    neg_mean = np.mean(values[~labels, :])
#     import pdb;pdb.set_trace()
#     print(pos_mean)
    return pos_mean-neg_mean
                    

def mean_permutation_test(labels, values, num_iter = 1000):
    pos_mean = np.mean(values[labels, :])
    neg_mean = np.mean(values[~labels, :])

    real_value = cacl_mean_diff(labels, values, shuff = False)
    shuff_values = [cacl_mean_diff(labels, values, shuff = True) for i in range(num_iter)]
    p_val = np.mean(shuff_values >= real_value)
    return p_val, real_value, shuff_values

def permutation_test_compare_baseline(unit_key_df, trial_type, time_window, num_iter = 10000):
    
    ## binsize = 0.025, 
    ## stim onset in z-scored trial type arrays start at 1 sec (start of +1sec bin is 39)
    time_wind_inds = ((np.array(time_window)/0.025) + 39).astype(int)
    
    test_period =  np.vstack(unit_key_df[trial_type].values)[:, time_wind_inds[0]:time_wind_inds[1]]
    window_dur = time_wind_inds[1]-time_wind_inds[0]
    baseline_period =  np.vstack(unit_key_df[trial_type].values)[:, 39-window_dur:39]

    pooled_periods = np.vstack([test_period, baseline_period])
    pooled_period_labels = np.concatenate([[True]*test_period.shape[0], [False]*baseline_period.shape[0]])
    
    results = mean_permutation_test(pooled_period_labels,pooled_periods, num_iter = num_iter)
    
    print(f"   mean z-score: {test_period.mean():.3f}, sem: {test_period.std()/np.sqrt(test_period.shape[0]):.2f}")
    print(f"   pval:{results[0]}, test_stat: {results[1]:.2f}, n_iter: {len(results[2])}\n")
    
    return results
     

def permutation_test_compare_trials(unit_key_df, trial_type1, trial_type2, time_window, num_iter = 10000):
    
    ## binsize = 0.025, 
    ## stim onset in z-scored trial type arrays start at 1 sec (start of +1sec bin is 39)
    time_wind_inds = ((np.array(time_window)/0.025) + 39).astype(int)
    
    trial_type1 =  np.vstack(unit_key_df[trial_type1].values)[:, time_wind_inds[0]:time_wind_inds[1]]
    trial_type2 =  np.vstack(unit_key_df[trial_type2].values)[:, time_wind_inds[0]:time_wind_inds[1]]

    pooled_periods = np.vstack([trial_type1, trial_type2])
    pooled_period_labels = np.concatenate([[True]*trial_type1.shape[0], [False]*trial_type2.shape[0]])
    p_val, real_val, shuff_vals = mean_permutation_test(pooled_period_labels,pooled_periods, num_iter = num_iter)
    
    results = mean_permutation_test(pooled_period_labels,pooled_periods, num_iter = num_iter)
    
    print(f"   mean z-score difference: {(trial_type1 - trial_type2).mean():.2f}, sem: {(trial_type1 - trial_type2).std()/np.sqrt(trial_type1.shape[0]):.2f}")
    print(f"   pval:{results[0]}, test_stat: {results[1]:.2f}, n_iter: {len(results[2])}\n")
    return results
    

### Statistics for elevated activity over baseline for diff trial types

In [6]:
print('hits vs miss 0-500ms')
hit_vs_miss_500ms = permutation_test_compare_trials(unit_key_df, 'Touch Stim Hit(z_score)','Touch Stim Miss(z_score)', [0,0.5])


hits vs miss 0-500ms
   mean z-score difference: 0.98, sem: 0.08
   pval:0.0, test_stat: 0.98, n_iter: 10000



In [7]:
print('hits vs miss 0-500ms')
hit_vs_miss_500ms = permutation_test_compare_trials(unit_key_df, 'Touch Stim Hit(z_score)','Touch Stim Miss(z_score)', [0,0.5])

print("hits vs baseline 0-500ms")
hit_vs_baseline_500ms = permutation_test_compare_baseline(unit_key_df, 'Touch Stim Hit(z_score)', [0,0.5])

print("miss vs baseline 0-500ms")
miss_vs_baseline_500ms = permutation_test_compare_baseline(unit_key_df, 'Touch Stim Miss(z_score)', [0,0.5])

print("hits vs baseline: 0-150ms")
hit_vs_baseline_150ms = permutation_test_compare_baseline(unit_key_df, 'Touch Stim Hit(z_score)', [0,0.15])
print("hits vs baseline: 150-500ms")
hit_vs_baseline_150_500ms = permutation_test_compare_baseline(unit_key_df, 'Touch Stim Hit(z_score)', [0.15,0.5])

print("miss vs baseline: 0-150ms")
miss_vs_baseline_150ms = permutation_test_compare_baseline(unit_key_df, 'Touch Stim Miss(z_score)', [0,0.15])
print("miss vs baseline: 150-500ms")
miss_vs_baseline_150_500ms = permutation_test_compare_baseline(unit_key_df, 'Touch Stim Miss(z_score)', [0.15,0.5])

      
print("Touch block FA vs baseline 0-500ms")
FA_vs_baseline_500ms = permutation_test_compare_baseline(unit_key_df, 'Visual Stim FA(z_score)', [0,0.5])

print("Touch block CR vs baseline 0-500ms")
CR_vs_baseline_500ms = permutation_test_compare_baseline(unit_key_df, 'Visual Stim CR(z_score)', [0,0.5])




hits vs miss 0-500ms
   mean z-score difference: 0.98, sem: 0.08
   pval:0.0, test_stat: 0.98, n_iter: 10000

hits vs baseline 0-500ms
   mean z-score: 1.182, sem: 0.08
   pval:0.0, test_stat: 1.29, n_iter: 10000

miss vs baseline 0-500ms
   mean z-score: 0.202, sem: 0.05
   pval:0.0016, test_stat: 0.09, n_iter: 10000

hits vs baseline: 0-150ms
   mean z-score: 0.876, sem: 0.09
   pval:0.0, test_stat: 0.97, n_iter: 10000

hits vs baseline: 150-500ms
   mean z-score: 1.314, sem: 0.08
   pval:0.0, test_stat: 1.42, n_iter: 10000

miss vs baseline: 0-150ms
   mean z-score: 0.399, sem: 0.06
   pval:0.0, test_stat: 0.28, n_iter: 10000

miss vs baseline: 150-500ms
   mean z-score: 0.118, sem: 0.04
   pval:0.463, test_stat: 0.00, n_iter: 10000

Touch block FA vs baseline 0-500ms
   mean z-score: 0.502, sem: 0.06
   pval:0.0, test_stat: 0.44, n_iter: 10000

Touch block CR vs baseline 0-500ms
   mean z-score: -0.037, sem: 0.03
   pval:0.6829, test_stat: -0.01, n_iter: 10000



In [12]:
print("Visual block CR vs baseline 0-150ms")
tactile_CR_vs_baseline_500ms = permutation_test_compare_baseline(unit_key_df, 'Touch Stim CR(z_score)', [0,0.15])


Visual block CR vs baseline 0-150ms
   mean z-score: 0.35, sem: 0.06
   pval:0.0, test_stat: 0.41, n_iter: 10000



In [17]:
Visual_CR_vs_baseline_500ms = permutation_test_compare_baseline(unit_key_df, 'Visual Stim CR(z_score)', [0,0.15])


   mean z-score: -0.049, sem: 0.03
   pval:0.2895, test_stat: 0.01, n_iter: 10000



In [18]:
Visual_CR_vs_baseline_500ms = permutation_test_compare_baseline(unit_key_df, 'Visual Stim Hit(z_score)', [0,0.15])


   mean z-score: -0.050, sem: 0.03
   pval:0.0209, test_stat: 0.04, n_iter: 10000



In [19]:
Visual_CR_vs_baseline_500ms = permutation_test_compare_baseline(unit_key_df, 'Visual Stim Miss(z_score)', [0,0.15])


   mean z-score: -0.044, sem: 0.04
   pval:0.6491, test_stat: -0.01, n_iter: 10000



In [None]:
print("Touch block FA vs baseline")

touch_blocks = log_df[log_df['block_type'] == 'Whisker']
unit_gen = compare_to_baseline_unit_generator(log_df, ['07903-22-164t2'], 'Touch Stim Miss', [0,0.5], stim_length = 'long')
all_pvals_FA_baseline = {unit:permutation_test(labels, spikes) for unit, labels, spikes in unit_gen if labels is not None}


### Statistics for comparing activity between trial types

In [None]:
unit_gen = compare_trials_unit_generator(log_df,unit_key_df['uni_id'], 'Touch Stim Hit', 'Touch Stim Miss',[0, 0.5], stim_length = 'long')
all_pvals_hit_miss_post_stim = {unit:mean_permutation_test(labels, spikes) for unit, labels, spikes in unit_gen}

In [None]:
def cum_dist(arr, bins):
    hist = np.histogram(arr, bins = bins)
    cumsum = np.cumsum(hist[0])
    return [cumsum, hist[1]]

In [None]:
len(all_pvals_hit_miss_post_stim)

In [None]:
len(all_pvals_hit_baseline_stim_period)

In [None]:
fig, axes = plt.subplots(4,1, figsize = (7,18))
# plt.tight_layout()


compare_to_baseline_vals = [
    all_pvals_hit_baseline,
    all_pvals_miss_baseline,
    all_pvals_FA_baseline,
    all_pvals_CR_baseline
]

compare_to_baseline_vals_stim = [
    all_pvals_hit_baseline_stim_period,
    all_pvals_miss_baseline_stim_period
]

compare_to_baseline_vals_post_stim = [
    all_pvals_hit_baseline_post_stim_period,
    all_pvals_miss_baseline_post_stim_period
]

for group, c, label, y_pos in zip(compare_to_baseline_vals, ['C0', 'k', 'C2', 'C3'], 
                           ['Hit', 'Miss', 'FA', 'CR'], [0.4, 0.3,0.2,0.1]):
    
    pvals = np.array(list(group.values()))
    frac_below_a = sum(pvals < 0.05)/pvals.shape[0]
    cumsum = cum_dist(pvals,np.arange(0,1.001, 0.001))
    axes[0].plot(cumsum[1][:-1], cumsum[0]/len(group), color = c)
    axes[0].text(.65, y_pos, label + f' ({frac_below_a:.2f})', color = c, transform = axes[0].transAxes)
axes[0].set_title('Elevated above baseline (0-500ms)', pad = 20)


cumsum = cum_dist(list(all_pvals_hit_miss_post_stim.values()),np.arange(0,1.001, 0.001))
axes[1].plot(cumsum[1][:-1], cumsum[0]/len(all_pvals_hit_miss_post_stim), '-', color = 'blue')
axes[1].set_title('Activity in "Hit" > activity in "Miss" (0-500ms)', pad = 20)

for group, c, label, y_pos in zip(compare_to_baseline_vals_stim, ['C0', 'k'], ['Hit', 'Miss'], [0.4, 0.3]):
    pvals = np.array(list(group.values()))
    frac_below_a = sum(pvals < 0.05)/pvals.shape[0]
    cumsum = cum_dist(list(group.values()),np.arange(0,1.001, 0.001))
    axes[2].plot(cumsum[1][:-1], cumsum[0]/len(group), color = c)
    axes[2].text(.65, y_pos, label + f' ({frac_below_a:.2f})', color = c, transform = axes[2].transAxes)
axes[2].set_title('Elevated above baseline (0-150ms)', pad = 20)


for group, c, label, y_pos in zip(compare_to_baseline_vals_post_stim, ['C0', 'k'], ['Hit', 'Miss'], [0.4, 0.3]):
    pvals = np.array(list(group.values()))
    frac_below_a = sum(pvals < 0.05)/pvals.shape[0]
    cumsum = cum_dist(list(group.values()),np.arange(0,1.001, 0.001))
    axes[3].plot(cumsum[1][:-1], cumsum[0]/len(group), color = c)
    axes[3].text(.65, y_pos, label + f' ({frac_below_a:.2f})', color = c, transform = axes[3].transAxes)
axes[3].set_title('Elevated above baseline (150-500ms)', pad = 20)
axes[3].set_xlabel('p-value')

for ax in axes:
    ax.set_ylabel('Cummulative fraction\nof neurons')
    ax.set_ylim(0,1)
    ax.set_xlim(0,1)
    ax.axvline(0.05, linestyle = '--', color = 'k')
    ax.set_xticks([0.05,0.5,1])

fig.subplots_adjust(hspace = .35, left = 0.3)
fig.savefig('permutation_test_fig2.png')

In [None]:
pos_mean = np.mean(spikes[labels, :], axis = 0)
neg_mean = np.mean(spikes[~labels, :], axis = 0)

real_value = calc_euc_dist(labels, spikes, shuff = False)
shuff_values = [calc_euc_dist(labels, spikes, shuff = True) for i in range(10000)]
np.mean(shuff_values >= real_value)

In [None]:

real_value

In [None]:
np.max(shuff_values)

In [None]:
example.columns

In [None]:
labels.isin(['Touch Stim Hit', 'Touch Stim Miss'])

In [None]:
np.vstack(spikes)