# Demo Notebook how to run models on static mouse datasets

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2 

In [2]:
import os
import datajoint as dj
dj.config['database.host'] = os.environ['DJ_HOST']
dj.config['database.user'] = os.environ['DJ_USER']
dj.config['database.password'] = os.environ['DJ_PASS']
dj.config['enable_python_native_blobs'] = True
dj.config['display.limit'] = 200

name = 'iclr' #"test"
dj.config['schema_name'] = f"konstantin_nnsysident_{name}"

In [334]:
import torch
import numpy as np
import pickle 
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 10)
from collections import OrderedDict, Iterable
import matplotlib.pyplot as plt
import matplotlib
import re
import seaborn as sns

import nnfabrik
from nnfabrik.main import *
from nnfabrik import builder
from nnfabrik.utility.hypersearch import Bayesian
from dataport.bcm.static import PreprocessedMouseData

from nnsysident.tables.experiments import *
from nnsysident.tables.bayesian import *
from nnsysident.datasets.mouse_loaders import static_shared_loaders
from nnsysident.datasets.mouse_loaders import static_loaders
from nnsysident.datasets.mouse_loaders import static_loader
from nnsysident.tables.scoring import OracleScore, OracleScoreTransfer

In [4]:
def find_number(text, c):
    number_list = re.findall(r'%s(\d+)' % c, text)
    if len(number_list) == 0:
        number = None
    elif len(number_list) == 1:
        number = int(number_list[0])
    else:
        raise ValueError('More than one number found..') 
    return number

In [None]:
# find best hyperparameters

data = pd.DataFrame((TrainedModelBayesianTransfer() * 
                     ModelBayesian * 
                     DatasetBayesian * 
                     Trainer & 
                     'model_fn = "nnsysident.models.models.se2d_fullSXF"'  
                     ).fetch()) #'trainer_hash = "3c6008284286683e7ce19e9e1269f507"'
data = pd.concat([data, data['dataset_config'].apply(pd.Series)], axis = 1)#.drop('dataset_config', axis = 1)
data = pd.concat([data, data['model_config'].apply(pd.Series)], axis = 1)#.drop('model_config', axis = 1)
data = pd.concat([data, data['trainer_config'].apply(pd.Series)], axis = 1)#.drop('model_config', axis = 1)
data = data.loc[data['multi_match_n'] == 1000]
data = data.replace({'transfer_state_dict': {'ed4a5d0bf35948b7ea65ee48bb5b5d93.pth.tar': '4factorized', 
                                              'd99820ef586bbebcfce36d7bc89877f3.pth.tar': '11gaussian',
                                              'f6cf96b1ed6bdbec448acbd2742f1c05-4gaussian.pth.tar': '4gaussian', 
                                              '7df6320a324cb7463fd504df5559f456 - 4factorized-shared.pth.tar': '4factorized-shared',
                                              'ed4a5d0bf35948b7ea65ee48bb5b5d93-4factorized.pth.tar': '4factorized'}})

import hiplot as hip
from nnsysident.tables.bayesian import *

one_exp_h = data.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'share_features', 'image_n', 'transfer_state_dict', 'normalize'] #'multi_match_n', 'neuron_n',
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

In [486]:
for experiment_name in ['Real, Direct, se2d_fullSXF, 4-set',
                        'Real, Direct, share_features=True, se2d_fullSXF, 4-set'
                        ]:

    TrainedModel.progress(Experiments.Restrictions & 'seed in (1,2,3,4,5)' & 'experiment_name="{}"'.format(experiment_name))

TrainedModel         Completed 450 of 450 (100.0%)   2020-09-04 20:14:42
TrainedModel         Completed 0 of 150 (0.0%)   2020-09-04 20:14:42


In [485]:
schema.jobs

table_name  className of the table,key_hash  key hash,"status  if tuple is missing, the job is available",key  structure containing the key,error_message  error message returned if failed,error_stack  error stack if failed,user  database user,host  system hostname,pid  system process id,connection_id  connection_id(),timestamp  automatic timestamp
__trained_model,2a2338fa23238a34afbc3cec3b3d99a6,reserved,=BLOB=,,=BLOB=,konstantin@134.2.168.88,c48b9d79549b,1,4020831,2020-09-04 20:14:13
__trained_model,2cbc56f42456984eb7169064835acd0b,reserved,=BLOB=,,=BLOB=,konstantin@134.2.168.88,ec9bd4f46cc8,1,4020821,2020-09-04 20:14:03
__trained_model,3254c0cef0f9ca4aea2dc7f3d4b1bf3e,reserved,=BLOB=,,=BLOB=,konstantin@134.2.168.88,c48ef30ff49d,1,4020848,2020-09-04 20:14:32
__trained_model,68ae70ca2d13a63e2705fe072800c5d6,reserved,=BLOB=,,=BLOB=,konstantin@134.2.168.88,c99b73489d6f,1,4020812,2020-09-04 20:13:53
__trained_model,78abc1c5914b76d2157517cd6bfb0dc9,reserved,=BLOB=,,=BLOB=,konstantin@134.2.168.88,c0707607ca0f,1,4020797,2020-09-04 20:13:36
__trained_model,8ec91ac38cf68bc1531e066221f410ed,reserved,=BLOB=,,=BLOB=,konstantin@134.2.168.88,6fafc51d03a7,1,4020787,2020-09-04 20:13:25
__trained_model,93ecca9fe42a6bf0b42f7745ee4cb39d,reserved,=BLOB=,,=BLOB=,konstantin@134.2.168.88,dc816b27535b,1,4020840,2020-09-04 20:14:23


In [480]:
model_name = 'se2d_fullSXF'
paths = ['data/static22564-2-12-preproc0.zip',
         'data/static22564-2-13-preproc0.zip',
         'data/static22564-3-8-preproc0.zip',
         'data/static22564-3-12-preproc0.zip']

experiment_name = 'Real, Direct, share_features=True, {}, 4-set'.format(model_name)
experiment_comment = 'Directly trained on real data with {} and static_shared_loaders of the 4-set. Varying number of neurons and images.'.format(model_name)
fabrikant_name = 'kklurz'
detach_core = False
print('detach_core = {}'.format(detach_core))

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'
trainer_fn = 'nnsysident.training.trainers.standard_trainer'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] and len(row['paths']) == len(paths) for loc, row in dataset.iterrows()]]

dataset = dataset.loc[(dataset['multi_match_n'].isin([500]))]
dataset = dataset.loc[(dataset['multi_match_base_seed'].isin([1,2,3,4,5])) & (dataset['image_base_seed'] == 1)] # maybe comment here
#dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()] # maybe comment here


model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)

model = model.loc[model['share_features'] == True] # maybe comment here
#model = model.loc[~model['grid_mean_predictor'].isnull()]
model.rename(columns={'neuron_n': 'multi_match_n'}, inplace=True)
model.replace({'multi_match_n': {1000: 500}}, inplace=True)  
    
trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1)
trainer = trainer.loc[(trainer['detach_core'] == detach_core)]    
assert len(trainer) == 1, 'Too many trainers!'

combinations = pd.merge(dataset, model, on=["multi_match_n", "image_n"]).sort_values(['multi_match_n', 'image_n'])

experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

# Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
# Experiments.Restrictions.insert(experiment, skip_duplicates=True)

detach_core = False


---

In [None]:
# change path here
paths = ['data/static22564-2-12-preproc0.zip',
                     'data/static22564-2-13-preproc0.zip',
                     'data/static22564-3-8-preproc0.zip',
                     'data/static22564-3-12-preproc0.zip']

#paths = ['data/static22564-2-12-preproc0.zip']
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'
dataset_config = dict(
    paths=paths,
    batch_size=64,
    seed=1,
    #image_n=50,
    #image_base_seed=1,
    #multi_match_n=972,
    #multi_match_base_seed=1,
    #exclude_multi_match_n = 3625,

)

dataloaders = builder.get_data(dataset_fn, dataset_config)

# Get Model

### The New gaussian readout: change gauss_type for the different modes

In [None]:
model_fn = 'nnsysident.models.models.se2d_spatialxfeaturelinear'

model_config = { 'gamma_input': 1.0,
                 'input_kern': 15,
                 'hidden_kern': 13,
                 'gamma_readout': 0.011027209808690062}


model = builder.get_model(model_fn=model_fn, model_config=model_config, dataloaders=dataloaders, seed=1)
model_real = builder.get_model(model_fn=model_fn, model_config=model_config, dataloaders=dataloaders, seed=1)
model_start = builder.get_model(model_fn=model_fn, model_config=model_config, dataloaders=dataloaders, seed=1)

In [None]:
core_dict = torch.load('f8acb861ca08a5baa6a2824ca33f51aa.pth.tar') # score: 0.354544

def get_grids(my_model):
    grids = {}
    for key, readout in my_model.readout.items():
        grid = readout.grid.squeeze().cpu().data.numpy()
        grids[key] = grid
    return grids

list_of_load = [] #'scales', '_features' ,'mu_transform.0.weight', 'mu_transform.0.bias'
list_of_detach = []



model_real.load_state_dict(core_dict, strict=False)
real_grids = get_grids(model_real)

remove=[]
keep=[]
for key in core_dict.keys():
    name = '.'.join(key.split('.')[2:])
    if key.split('.')[0] == 'readout': 
        if not np.isin(name, list_of_load):
            print('Not loading:    {}'.format(key))
            remove.append(key)
        else:
            keep.append(key)
for key in keep:
    print('Loading:  {}'.format(key))

for k in remove: del core_dict[k]
model_start.load_state_dict(core_dict, strict=False)
start_grids = get_grids(model_start)



model.load_state_dict(core_dict, strict=False)

for param in model.named_parameters():
    name = '.'.join(param[0].split('.')[2:])
    if param[0].split('.')[0] == 'readout':
        if np.isin(name, list_of_detach):
            print('detaching:    {}'.format(param[0]))
            param[1].requires_grad = False

# Get Trainer

In [None]:
trainer_fn = 'nnsysident.training.trainers.standard_trainer'
trainer_config = dict(track_training=True) #detach_core=True, 
trainer = builder.get_trainer(trainer_fn, trainer_config)

# Run Training

In [None]:
score, output, model_state = trainer(model=model, dataloaders=dataloaders, seed=1)

In [None]:
import hiplot as hip
from nnsysident.tables.bayesian import *

data = pd.DataFrame((TrainedModelBayesian() * ModelBayesian() & 'model_fn = "nnsysident.models.models.se2d_fullSXF"').fetch())
data = pd.concat([data, data['model_config'].apply(pd.Series)], axis = 1)

one_exp_h = data.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'share_features', 'normalize' ]
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)