# Demo to replicate the one-shot learning behaviour demonstrated by rats using a fully neural implementation of schemas

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import sys
import time
sys.path.append("C:/Users/Razer/PycharmProjects/schema4one/examples/")
from demo_utils import run_hebagent_multiplepa_expt, get_default_hp, Maze, ResACAgent
import pandas as pd

## Initialize hyperparameters

In [2]:
hp = get_default_hp(task='6pa')

hp['time'] = 600  # set max time for each trial
hp['probetime'] = 60  # set max time for each probe trial

hp['alr'] = 0.00005  # acor learning rate
hp['clr'] = 0.0002  # critic learning rate
hp['taug'] = 3000  # time constant for RPE
hp['usenmc'] = True  # True to use Neural Motor Controller
hp['stochlearn'] = True  # True to learn association in one-shot using node perturbation method
hp['Rval'] = 5  # reward value
hp['render'] = False  # visualise movement trial by trial
hp['contbeta'] = 0.8  # Beta control parameter where Actor and Schema determines direction of movement

hp['exptname'] = '6pa_res_{}cb_{}ach_{}glr_{}sl_{}clr_{}tg_{}alr_{}R_{}dt_{}'.format(
    hp['contbeta'],hp['ach'], hp['glr'], hp['stochlearn'], hp['clr'], hp['taug'],hp['alr'],
    hp['Rval'], hp['tstep'], time.monotonic())
print(hp['exptname'])


6pa_res_0.8cb_5e-05ach_7.5e-06glr_Truesl_0.0002clr_3000tg_5e-05alr_5R_20dt_181364.437


## Initialize Environments
- OPA: 20 session - 17 training, 3 probes
- 2NPA, 6NPA or NM 2 sessions: 1 training, 1 probe

In [3]:
env = Maze(hp)
trainprobesess = [2,9,16]
oneshotprobesess = [2]
envtypes = ['train','opa','2npa','6npa','nm','6nm']
probenames = ['PS1','PS2','PS3']
evalprobenames = ['OPA','2NPA','6NPA','NM']
nmprobenames = ['NM1','NM2','NM3', '6NPANM']

## Initialize neural agent with neural motor controller

In [4]:
agent = ResACAgent(hp=hp)

## Train Neural agent on OPA for 20 sessions

In [None]:
mpalatency, mpapath, mpaweights, mpavisitratio, mpalearned = run_hebagent_multiplepa_expt(mtype='train', env=env, agent=agent, sessions=20, noreward=trainprobesess, useweight=None)

train env created. Training ...
C4 | S 90 | Dgr 0 | Recall Goal [-0.02 -0.14  0.98]
C5 | S 132 | Dgr 0 | Recall Goal [-0.04 -0.01  1.11]
C1 | S 53 | Dgr 0 | Recall Goal [-0.07 -0.03  1.02]
C3 | S 34 | Dgr 0 | Recall Goal [-0.05 -0.1   1.07]
C2 | S 131 | Dgr 0 | Recall Goal [0.06 0.01 1.09]
C6 | S 120 | Dgr 0 | Recall Goal [ 0.01 -0.26  1.02]
############## train Session 1/20, Avg Steps 4705.8, ##############
C5 | S 60 | Dgr 10.5 | Recall Goal [-0.07 -0.06  0.82]
C2 | S 60 | Dgr 5.44 | Recall Goal [ 0.07 -0.1   0.74]
C6 | S 60 | Dgr 0 | Recall Goal [-0.07 -0.19  0.78]


In [None]:
# Plot learning performance
plt.figure()
plt.plot(mpalatency)

df = pd.DataFrame(mpavisitratio, index=probenames)
ax = df.plot(kind='bar',legend=False)
ax.axhline(y=100/6,color='red',linestyle='--')  # chance performance 1 of 6 correct targets
ax.set_ylim(0,100)
ax.set_ylabel('Visit Ratio')

col = ['b', 'g', 'r', 'y', 'm', 'k']
env.make('train')
f,axs = plt.subplots(nrows=3, ncols=6, figsize=(12,8))
for ax in axs.flatten():
    ax.set_xticks([])
    ax.set_yticks([])

for p in range(3):
    for c in range(6):
        K = mpapath[p,c]
        ax = axs[p,c]
        if p == 0:
            ax.set_title(f"Cue {c+1}")
        if c == 0:
            ax.set_ylabel(probenames[p])

        ax.axis((-env.au / 2, env.au / 2, -env.au / 2, env.au / 2))
        ax.set_aspect('equal', adjustable='box')

        for r in range(6):
            circle = plt.Circle(env.rlocs[r], env.rrad, color=col[r], zorder=3)
            ax.add_artist(circle)

        ax.plot(K[:, 0], K[:, 1], col[c], alpha=0.5, zorder=2)
plt.tight_layout()
plt.show()

## Use trained agent to learn new PAs in one-shot

In [None]:
# Train Neural agent on OPA for 1 session as control and probe after, use weights learned over 20 sessions on MPA
opalatency, opapath, opaweights, opavisitratio, opalearned = run_hebagent_multiplepa_expt(mtype='opa', env=env, agent=agent, sessions=2, noreward=oneshotprobesess, useweight=mpaweights)

In [None]:
# Train Neural agent on 2NPA for 1 session and probe after, use weights learned over 20 sessions on MPA
npa2latency, npa2path, npa2weights, npa2visitratio, npa2learned = run_hebagent_multiplepa_expt(mtype='2npa', env=env, agent=agent, sessions=2, noreward=oneshotprobesess, useweight=mpaweights)

In [None]:
# Train Neural agent on 6NPA for 1 session and probe after, use weights learned over 20 sessions on MPA
npa6latency, npa6path, npa6weights, npa6visitratio, npa6learned = run_hebagent_multiplepa_expt(mtype='6npa', env=env, agent=agent, sessions=2, noreward=oneshotprobesess, useweight=mpaweights)

In [None]:
# Train Neural agent on NM for 1 session and probe after, use weights learned over 20 sessions on MPA
nmlatency, nmpath, nmweights, nmvisitratio, nmlearned = run_hebagent_multiplepa_expt(mtype='nm', env=env, agent=agent, sessions=2, noreward=oneshotprobesess, useweight=mpaweights)

In [None]:
# plot performance
evaldigrate = np.concatenate([opavisitratio, npa2visitratio, npa6visitratio, nmvisitratio])
print(evaldigrate)

df2 = pd.DataFrame(evaldigrate, index=evalprobenames)
ax = df2.plot(kind='bar',legend=False)
ax.axhline(y=100/6,color='red',linestyle='--')  # chance performance 1 of 6 correct targets
ax.set_ylim(0,100)
ax.set_ylabel('Visit Ratio')


col = ['b', 'g', 'r', 'y', 'm', 'k']
allevalpaths = [opapath, npa2path, npa6path, nmpath]
npa2cues = [7, 2,3,4,5,8]
f,axs = plt.subplots(nrows=4, ncols=6, figsize=(12,8))
for ax in axs.flatten():
    ax.set_xticks([])
    ax.set_yticks([])

for p in range(4):
    env.make(envtypes[p+1])
    for c in range(6):
        path = allevalpaths[p]
        K = path[0, c]
        ax = axs[p,c]
        if p == 0:
            ax.set_title(f"Cue {c+1}")
        elif p == 1:
            ax.set_title(f"Cue {npa2cues[c]}")
        else:
            ax.set_title(f"Cue {c+11}")

        if c == 0:
            ax.set_ylabel(evalprobenames[p])

        ax.axis((-env.au / 2, env.au / 2, -env.au / 2, env.au / 2))
        ax.set_aspect('equal', adjustable='box')

        for r in range(6):
            circle = plt.Circle(env.rlocs[r], env.rrad, color=col[r], zorder=3)
            ax.add_artist(circle)

        ax.plot(K[:, 0], K[:, 1], col[c], alpha=0.5, zorder=2)
plt.tight_layout()
plt.show()



## Introduce trained agents to a New Maze (Remap place cells) for 20 sessions followed by 2 sessions with 6 new PAs

In [None]:
# Train Neural agent on NM for 20 session similar to on MPA, use weights learned over 20 sessions on MPA
nm20latency, nm20path, nm20weights, nm20visitratio, nm20learned = run_hebagent_multiplepa_expt(mtype='nm', env=env, agent=agent, sessions=20, noreward=trainprobesess, useweight=mpaweights)

In [None]:
# Train Neural agent on 6NM for 2 session, use weights learned over 20 sessions on NM
nm6latency, nm6path, nm6weights, nm6visitratio, nm6learned = run_hebagent_multiplepa_expt(mtype='6nm', env=env, agent=agent, sessions=2, noreward=oneshotprobesess, useweight=nm20weights)

In [None]:
# plot relearning new environment and one shot performance
# plot performance

plt.figure()
plt.plot(nm20latency)

nmvisitratios = np.concatenate([nm20visitratio, nm6visitratio])
df3 = pd.DataFrame(nmvisitratios, index=nmprobenames)
df3.plot(kind='bar',legend=False)
ax.axhline(y=100/6,color='red',linestyle='--')  # chance performance 1 of 6 correct targets
ax.set_ylim(0,100)
ax.set_ylabel('Visit Ratio')


col = ['b', 'g', 'r', 'y', 'm', 'k']
allnmpaths = [nm20path, nm6path]
nm6cues = [7,8,9,10,17,18]
f,axs = plt.subplots(nrows=4, ncols=6, figsize=(12,8))
for ax in axs.flatten():
    ax.set_xticks([])
    ax.set_yticks([])

for p in range(4):
    if p == 3:
        env.make('6nm')
    else:
        env.make('nm')

    for c in range(6):
        ax = axs[p,c]
        if p == 3:
            ax.set_title(f"Cue {nm6cues[c]}")
            path = allnmpaths[1]
            K = path[0, c]
        else:
            ax.set_title(f"Cue {c+11}")
            path = allnmpaths[0]
            K = path[p, c]

        if c == 0:
            ax.set_ylabel(nmprobenames[p])

        ax.axis((-env.au / 2, env.au / 2, -env.au / 2, env.au / 2))
        ax.set_aspect('equal', adjustable='box')

        for r in range(6):
            circle = plt.Circle(env.rlocs[r], env.rrad, color=col[r], zorder=3)
            ax.add_artist(circle)

        ax.plot(K[:, 0], K[:, 1], col[c], alpha=0.5, zorder=2)
plt.tight_layout()
plt.show()