# A Non-Parametric Bayesian Method for Inferring Hidden Causes
<a href="http://cocosci.berkeley.edu/tom/papers/ibpuai.pdf">F., Griffiths, T.L., Ghahramani, Z., 2006.<br />
Presented at the Proceedings of the Conference on Uncertainty in Artificial Intelligence.</a>

### Reqirements
* #### You need to install module future, manual importing from \_\_future\_\_ is at your convenience
* #### For hdf data import you need pytables too which is not default installed with Anaconda

### Batch execution
* #### ```batch_animal=msaxxyy_z jupyter nbconvert Bayesian.ipynb --to=html --execute --ExecutePreprocessor.timeout=-1 --output=xxyy_z_report.html```

In [None]:
#from future.utils import PY3
import future
from __future__ import (absolute_import, division,
                        print_function) #, unicode_literals)
import pandas as pd
import numpy as np
import time, os, warnings, imp, itertools
import IPython.display as disp
display = disp.display
import matplotlib as mpl, matplotlib.pyplot as plt
import scipy.stats as stats
zscore, describe = stats.mstats.zscore, stats.describe
import datetime
dt, td = datetime.datetime, datetime.timedelta

%matplotlib inline

In [None]:
import ca_lib as la
imp.reload(la)

In [None]:
from os import environ
batch_animal = environ.get('batch_animal', None)

## Load files

In [None]:
basedir = '../_share/Losonczi/'

# Display database folders
display(os.listdir(basedir))

# Select animal
if batch_animal is None:
    animal = 'msa0216_4'; FPS = 8
    #animal = 'msa0316_1'; FPS = 8
    #animal = 'msa0316_3'; FPS = 8
    #animal = 'msa0316ag_1'; FPS = 8
    #animal = 'msa0915_1'; FPS = 30
    #animal = 'msa0915_2'; FPS = 30
    #animal = 'msa1215_1'; FPS = 30
else:
    FPS = None
    animal = batch_animal

print ('selecting',animal)

# List dir
mydir = os.path.join(basedir,animal)
os.listdir(mydir)

In [None]:
# Available trials and ROIs
data = la.load_files(mydir)
if (FPS is not None) and (data.FPS != FPS):
    warnings.warn('FPS indication might be wrong.')
print (data.raw.shape, '\n', data.trials, '\n', data.rois)

## Display

In [None]:
# Post-Learning may repeat session_num therefore an additional index,
# day_num is created. See msa0316_1.
# It seems though that Pre-Learning and Learning treats session_num as documented.
display(data.experiment_traits.head())
display(data.experiment_traits[data.experiment_traits['day_leap']])

## Save for matlab

In [None]:
np.array(data.experiment_traits.to_records()).shape

In [None]:
np.ndarray == np.recarray

In [None]:
import scipy.io as sio
def cellarray(df, index, dropna_axis=None, fillna_axis=None, fillna_method=None):
    '''Split a DataFrame with MultiIndex into a 1D cellarray'''
    import warnings
    ca = np.empty(shape=len(index), dtype=np.ndarray)
    for i, key in enumerate(index):
        tmp = df.loc[key]
        if dropna_axis is not None:
            tmp = tmp.dropna(axis=dropna_axis, how='all')
        if fillna_axis is not None:
            tmp = tmp.fillna(axis=fillna_axis, method=fillna_method).fillna(value=0)
        if (type(tmp.index) is pd.MultiIndex) or (type(tmp.columns) is pd.MultiIndex):
            warnings.warn('Matrix in a cell has multiindex.')
        ca[i] = tmp.values
        #print (tmp.shape, tmp.isnull().sum().sum())
    return ca

In [None]:
### Select acitve ROIs and prepare them for output
# trial ID
ix = data.mirow.levels[0]
# fill rate of spiking (this is a full df)
mea = data.spike.unstack('time',fill_value=0).mean(axis=1)
# present in almost all frames of almost ll trials & active
keep = data.mask_roi & (mea>0.02)
# extract ROIs to keep
rois = data.mask_roi[keep].index
# statistics
print ('Keep %d ROIs out of %d.'%(len(rois),len(data.mask_roi)))
ref = pd.MultiIndex.from_product((data.mirow.levels[0],(rois)))
def prep(df, fill_value=None):
    '''Reindex (fill in the gaps) and split DataFrame to cellarray'''
    df = df.reindex(fill_value=None, index=ref, columns=data.icol)
    ret = cellarray(df, ix, dropna_axis=1, fillna_axis=0, fillna_method='ffill')
    return ret

w = {'transients': prep(data.spike),
     'filtered': prep(data.filtered),
     'raw': prep(data.raw),
     'mask': prep(data.mask),
     'trials':data.mirow.levels[0].values.astype(str),
     'rois':rois.values.astype(str),
     'frames':data.icol.values}
sio.savemat(animal+'.mat',w)

In [None]:
include_w = True
# categoric feature: column, value
if include_w:
    cat_features = [('context', 'CS+'), ('context', 'CS-'), ('port', 'W+'), ('puffed', 'A+')]
else:
    cat_features = [('context', 'CS+'), ('context', 'CS-'), ('puffed', 'A+')]
# ordinal feature: column, list of allowed values
ord_features = []

## Bayesian inference

#### Prep data

In [None]:
def create_features(cat_list, list_ord, data):
    col = 0
    # features = pd.DataFrame(index=data.index, columns=[])
    features = []
    for column, criterion in cat_list:
        feat = data.loc[:,column] == criterion
        feat.name = '%d_%s' % (col, column)
        features.append(feat)
        col += 1
    features = pd.concat(features, axis=1)
    return features

In [None]:
from collections import Counter
e = Counter(data.experiment_traits['learning_epoch'])
ev = [0, e['Pre-Learning'], e['Learning'], e['Post-Learning']]
ev = np.cumsum(ev)
e, ev

In [None]:
cf = create_features(cat_features,ord_features,data.experiment_traits)
cf

In [None]:
p = np.mean(cf.values)
p

#### Init model

In [None]:
import BayesianHiddenCause as bc
imp.reload(bc)

In [None]:
bba = bc.BernoulliBetaAssumption(p, 3)

In [None]:
bba.observe(cf.astype(int).T)

In [None]:
bba.Gibbs_prepare(5)

In [None]:
fig = bc.plot_matrix_product('i (observabes)',bba.Z,'Z','t (trials)',bba.Y,'Y','k (causes)',bba.Px(),'X estimated')
fig.suptitle('Estimate')
fig = bc.plot_matrix_product('i (observabes)',np.array([[]]),'Z','t (trials)',np.array([[]]),'Y','k (causes)',cf.values.astype(int).T,'X observed')
fig.suptitle('Original')

#### Iterate

In [None]:
for i in range(0,100):
    bba.Gibbs_iterate()

In [None]:
links = []
for i in range(0,1000):
    for i in range(0,10):
        bba.Gibbs_iterate()
    links.extend([tuple(col) for col in bba.Z.T])

In [None]:
from collections import Counter
c = Counter(links)
c

In [None]:
def simulate(learner, test_samples, given_i):
    '''Simulate the learners response to the test samples
       taking into account only the features marked true in given_i'''
    # Initialize
    cum, totp = 0, 0
    # Test all possible latent states
    a=([0,1],)*learner.K
    for Y1 in itertools.product(*a):
        Y1 = np.array(Y1)
        # The probability of the given latent state in the model\US
        logpy = learner.logP_y_XZ(Y1, X=test_samples, given_i=given_i)
        py = np.exp(logpy)
        # The Bernoulli parameters for the observables
        px = learner.P_x_YZ(Y=Y1[:,np.newaxis])
        # The animal's response
        behav = px[~given_i]
        #print (Y1, behav)
        # Cumulate
        totp += py
        cum += py * behav
    # The animal's average response for the test samples would be
    return (cum/totp)

In [None]:
cat_features

In [None]:
# Given variables: all but US
given_i = np.array(map(lambda x: x[0]!='puffed', cat_features))
# Decay of past experiences
decay_time=np.inf

# Define a well established set of samples where we want to know the behavior
# V0
#test_samples = None
# V1 (CS+, W+), same, (CS+, W-), same, (CS-, W+), same
#test_samples = np.array([[1,0,1,1],[1,0,1,0],[1,0,0,1],[1,0,0,0],[0,1,1,0],[0,1,1,1]]).T
#test_names = ['CS+, W+', 'same', 'CS+, W-', 'same', 'CS-, W+', 'same']
# V2 (CS+, W+), (CS+, W-), (CS-, W+), (CS-, W-) OR (CS+), (CS-)
if include_w:
    test_samples = np.array([[1,0,1,1],[1,0,0,1],[0,1,1,0],[0,1,0,1]]).T
    test_names = ['CS+, W+', 'CS+, W-', 'CS-, W+', 'CS-, W-']
else:
    test_samples = np.array([[1,0,1],[0,1,1]]).T
    test_names = ['CS+', 'CS-']

responses = [] # np.empty(shape=(0,len(test_samples)))
# Train learner with first ntrial trials (equivalent weights) and see response
for ntrials in range(1,len(cf)):
    learner = bc.BernoulliBetaAssumption(p, 3, decay_time=decay_time)
    learner.observe(cf.astype(int).T.iloc[:,:ntrials])
    learner.Gibbs_prepare(5)
    for i in range(0,100):
        learner.Gibbs_iterate()
    resp = simulate(learner, test_samples, given_i)
    responses.append(resp)
    print(ntrials, resp)

In [None]:
# Plot whether the animal should expect the US based on its previous experiences
fig = plt.figure()
ax = fig.gca()
lines = ax.plot(list(range(0,len(responses))),np.concatenate(responses))
ax.set_xlabel('Trial ID')
ax.set_ylabel('P(expect airpuff)')
ax.vlines(ev, 0, 1)
plt.legend(test_names, loc='upper left')

#### Convert and save most frequent vectors

In [None]:
constellations = pd.DataFrame(c, index=[animal]).T
constellations.index.names = [b for a,b in cat_features]
constellations.index = pd.MultiIndex.from_arrays(np.array(constellations.index.tolist()).astype(bool).T,
                                                 names = [b for a,b in cat_features])
constellations

In [None]:
result = {'constellations':constellations}

In [None]:
la.store_to_hdf('baydb_'+animal+'.h5', result)