In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2 

In [2]:
import os
import datajoint as dj
dj.config['database.host'] = os.environ['DJ_HOST']
dj.config['database.user'] = os.environ['DJ_USER']
dj.config['database.password'] = os.environ['DJ_PASS']
dj.config['enable_python_native_blobs'] = True

name = 'iclr'
dj.config['schema_name'] = f"konstantin_nnsysident_{name}"

In [17]:
import torch
import numpy as np
import pickle 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)
import matplotlib.pyplot as plt
import re

import nnfabrik
from nnfabrik.main import *
from nnfabrik import builder
from nnfabrik.utility.hypersearch import Bayesian

from nnsysident.tables.experiments import *
from nnsysident.tables.bayesian import *
from nnsysident.datasets.mouse_loaders import static_shared_loaders
from nnsysident.datasets.mouse_loaders import static_loaders
from nnsysident.datasets.mouse_loaders import static_loader

def find_number(text, c):
    number_list = re.findall(r'%s(\d+)' % c, text)
    if len(number_list) == 0:
        number = None
    elif len(number_list) == 1:
        number = int(number_list[0])
    else:
        raise ValueError('More than one number found..') 
    return number

def get_transfer(old_experiment_name):
    # prepare the Transfer table in a way that all the info about the transferred model is in the DataFrame. Just pd.merge (on transfer_fn and transfer_hash)
    # it then with the model that the transferred model was used for. 
    transfer = pd.DataFrame(Transfer.fetch())
    transfer = pd.concat([transfer, transfer['transfer_config'].apply(pd.Series)], axis = 1).drop('transfer_config', axis = 1)

    tm = pd.DataFrame((TrainedModel * Dataset * Seed * Experiments.Restrictions & 'experiment_name = "{}"'.format(old_experiment_name)).fetch()).rename(
        columns = {'model_hash': 't_model_hash', 'trainer_hash': 't_trainer_hash', 'dataset_hash': 't_dataset_hash'})
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['t_model_hash', 't_trainer_hash', 't_dataset_hash'])

    transfer = pd.merge(transfer, tm, how='inner', on=['t_model_hash', 't_trainer_hash', 't_dataset_hash'])
    transfer = pd.concat([transfer, transfer['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
    transfer.columns = ['t_' + col if col[:2] != 't_' and col[:8] != 'transfer'  else col for col in transfer.columns]
    transfer = transfer.sort_values(['t_multi_match_n', 't_image_n', 't_multi_match_base_seed', 't_image_base_seed'])
    return transfer

# def get_transfer(transfer_hashes):
#     # prepare the Transfer table in a way that all the info about the transferred model is in the DataFrame. Just pd.merge (on transfer_fn and transfer_hash)
#     # it then with the model that the transferred model was used for. 
    
#     transfer = pd.DataFrame((Transfer & 'transfer_hash in {}'.format(tuple(transfer_hashes))).fetch())
#     transfer = pd.concat([transfer, transfer['transfer_config'].apply(pd.Series)], axis = 1).drop('transfer_config', axis = 1)

#     restriction = transfer.rename(columns = {'t_model_hash': 'model_hash', 't_dataset_hash': 'dataset_hash', 't_trainer_hash': 'trainer_hash'})            
#     restriction = restriction[['model_hash', 'dataset_hash', 'trainer_hash']].to_dict('records')

#     tm = pd.DataFrame((TrainedModel * Dataset * Seed & restriction).fetch()).rename(
#         columns = {'model_hash': 't_model_hash', 'trainer_hash': 't_trainer_hash', 'dataset_hash': 't_dataset_hash'})               
#     tm = tm.sort_values('score', ascending=False).drop_duplicates(['t_model_hash', 't_trainer_hash', 't_dataset_hash'])

#     transfer = pd.merge(transfer, tm, how='inner', on=['t_model_hash', 't_trainer_hash', 't_dataset_hash'])
#     transfer = pd.concat([transfer, transfer['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
#     transfer.columns = ['t_' + col if col[:2] != 't_' and col[:8] != 'transfer'  else col for col in transfer.columns]
#     transfer = transfer.sort_values(['t_multi_match_n', 't_image_n', 't_multi_match_base_seed', 't_image_base_seed'])
#     return transfer


def get_transfer_entries(old_experiment_name, overall_best):
    tm = pd.DataFrame((TrainedModel * Dataset * Seed * Experiments.Restrictions & 'experiment_name="{}"'.format(old_experiment_name)).fetch())
    tm = pd.concat([tm, tm['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)

    model_fn = np.unique(tm['model_fn'])
    assert len(model_fn) == 1 ,"Must have exactly 1 model function in experiment"
    model_fn = model_fn[0] 

    # Filter out best model(s) 
    if overall_best is True:
        tm = tm.loc[(tm['multi_match_n'] == tm['multi_match_n'].max()) & (tm['image_n'] == tm['image_n'].max())]
    else:
        tm = tm.loc[(tm['multi_match_n'] == tm['multi_match_n'].max())]
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['multi_match_n', 'image_n', 'multi_match_base_seed', 'image_base_seed']).sort_values(['multi_match_n', 'image_n'])

    # make entries for Trasfer table
    entries = [dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer', 
                     transfer_config = dict(t_model_hash=row.model_hash, t_dataset_hash=row.dataset_hash, t_trainer_hash=row.trainer_hash),
                     transfer_comment=model_fn.split('.')[-1] + ', multi_match_n={}, multi_match_base_seed={}, image_n={}, image_base_seed={}'.format(row.multi_match_n, 
                                                                                                                                    row.multi_match_base_seed, 
                                                                                                                                    row.image_n, 
                                                                                                                                    row.image_base_seed),
                     transfer_fabrikant='kklurz') for loc, row in tm.iterrows()]
    return entries

# Add Experiment (direkt)

### Add dataset entries for different neuron and image seed

In [None]:
# find best hyperparameters

data = pd.DataFrame((TrainedModelBayesian() * 
                     ModelBayesian * 
                     DatasetBayesian * 
                     Trainer & 
                     'model_fn = "nnsysident.models.models.se2d_fullSXF"'  
                     ).fetch()) #'trainer_hash = "3c6008284286683e7ce19e9e1269f507"'
data = pd.concat([data, data['dataset_config'].apply(pd.Series)], axis = 1)#.drop('dataset_config', axis = 1)
data = pd.concat([data, data['model_config'].apply(pd.Series)], axis = 1)#.drop('model_config', axis = 1)
data = pd.concat([data, data['trainer_config'].apply(pd.Series)], axis = 1)#.drop('model_config', axis = 1)

import hiplot as hip
from nnsysident.tables.bayesian import *

one_exp_h = data.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'share_features', 'multi_match_n', 'image_n']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

In [None]:
# entries from transfer hypersearch

gamma_readout = 31.207

neuron_n = 5335
image_n = 50


share_features = False
share_transform = False


model_comment = 'se2d_fullgaussian2d, neuron_n={}, image_n={}'.format(neuron_n, image_n)
model_config = { 'init_mu_range': 0.55,
                 'init_sigma': 0.4,
                 'input_kern': 15,
                 'hidden_kern': 13,
                 'gamma_input': 1.0,
                 'grid_mean_predictor': {'type': 'cortex',
                  'input_dimensions': 2,
                  'hidden_layers': 0,
                  'hidden_features': 0,
                  'final_tanh': False},
                
                 'gamma_readout': gamma_readout,
                 'share_features': share_features,
                 'share_transform': share_transform}
entry = dict(model_fn = 'nnsysident.models.models.se2d_fullgaussian2d', model_config = model_config, model_fabrikant = 'kklurz', model_comment = model_comment)
Model().add_entry(**entry)

In [None]:
# entries from direct hypersearch

neuron_n = 50
image_n = 50
for neuron_n in [50, 500, 3625]:
    for image_n in [50, 200, 500, 1000, 2500, 4399]:
        share_features = True
        share_transform = False
        dat = data.loc[(data['multi_match_n'] == neuron_n) & (data['image_n'] == image_n) & (data['share_features'] == True) & (data['share_transform'] == False)]
        gamma_readout = dat.loc[dat['score'] == dat['score'].max()].gamma_readout.values[0]
        if neuron_n == 3625:
            model_comment = 'se2d_fullgaussian2d, neuron_n={}, image_n={}'.format(3597, image_n)
        else:
            model_comment = 'se2d_fullgaussian2d, neuron_n={}, image_n={}'.format(neuron_n, image_n)
        model_config = { 'init_mu_range': 0.55,
                         'init_sigma': 0.4,
                         'input_kern': 15,
                         'hidden_kern': 13,
                         'gamma_input': 1.0,
                         'grid_mean_predictor': {'type': 'cortex',
                          'input_dimensions': 2,
                          'hidden_layers': 0,
                          'hidden_features': 0,
                          'final_tanh': False},

                         'gamma_readout': gamma_readout,
                         'share_features': share_features,
                         'share_transform': share_transform}
        entry = dict(model_fn = 'nnsysident.models.models.se2d_fullgaussian2d', model_config = model_config, model_fabrikant = 'kklurz', model_comment = model_comment)
        Model().add_entry(**entry, skip_duplicates=True)

In [None]:
# entries for dataset

for neuron_n in [50, 500, 1000, 3597]:
    for image_n in [50, 200, 500, 1000, 2500, 4399]:
        
        dataset_comment = 'neuron_n={}, image_n={}'.format(neuron_n, image_n)
        dataset_config = {'paths': ['data/static22564-2-12-preproc0.zip',
                          'data/static22564-2-13-preproc0.zip',
                          'data/static22564-3-8-preproc0.zip',
                          'data/static22564-3-12-preproc0.zip'],
                         'batch_size': 64,
                         'multi_match_n': neuron_n,
                         'multi_match_base_seed': 1,
                         'image_n': image_n,
                         'image_base_seed': 1}

        entry = dict(dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders', 
                     dataset_config = dataset_config, 
                     dataset_fabrikant = 'kklurz', 
                     dataset_comment = dataset_comment)
        Dataset().add_entry(**entry, skip_duplicates=True)

In [None]:
# entries for dataset - different seeds

multi_match_base_seeds = [1,2,3,4,5]
image_base_seeds = [1]

dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'
paths = ['data/static22564-2-12-preproc0.zip',
                     'data/static22564-2-13-preproc0.zip',
                     'data/static22564-3-8-preproc0.zip',
                     'data/static22564-3-12-preproc0.zip']

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[(dataset['multi_match_base_seed'] == 1) & (dataset['image_base_seed'] == 1)]
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
#dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()]

for loc, row in dataset.iterrows():
    for multi_match_base_seed in multi_match_base_seeds:
        for image_base_seed in image_base_seeds:
            dataset_config = row['dataset_config']
            dataset_config.update(multi_match_base_seed=multi_match_base_seed, image_base_seed=image_base_seed)
            Dataset().add_entry(dataset_fn=row['dataset_fn'], 
                                dataset_config=dataset_config, 
                                dataset_fabrikant=row['dataset_fabrikant'], 
                                dataset_comment=row['dataset_comment'], skip_duplicates=True)

### Add respective experiment (restriction) entries

In [None]:
model_name = 'se2d_fullgaussian2d'
paths = ['data/static22564-2-12-preproc0.zip',
         'data/static22564-2-13-preproc0.zip',
         'data/static22564-3-8-preproc0.zip',
         'data/static22564-3-12-preproc0.zip']

experiment_name = 'Real, Direct, {}, 4-set'.format(model_name)
experiment_comment = 'Directly trained on real data with {} and static_shared_loaders of the 4-set. Varying number of neurons and images.'.format(model_name)
fabrikant_name = 'kklurz'
detach_core = False
print('detach_core = {}'.format(detach_core))

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'
trainer_fn = 'nnsysident.training.trainers.standard_trainer'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] and len(row['paths']) == len(paths) for loc, row in dataset.iterrows()]]

dataset = dataset.loc[(dataset['multi_match_n'].isin([50, 500, 3597]))]
dataset = dataset.loc[(dataset['multi_match_base_seed'].isin([1,2,3,4,5])) & (dataset['image_base_seed'] == 1)] # maybe comment here
dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()] # maybe comment here


model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)

model = model.loc[model['share_features'] == True] # maybe comment here
model = model.loc[~model['grid_mean_predictor'].isnull()]
model.rename(columns={'neuron_n': 'multi_match_n'}, inplace=True)
  
    
trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1)
trainer = trainer.loc[(trainer['detach_core'] == detach_core)]    
assert len(trainer) == 1, 'Too many trainers!'

combinations = pd.merge(dataset, model, on=["multi_match_n", "image_n"]).sort_values(['multi_match_n', 'image_n'])

experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

# Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
# Experiments.Restrictions.insert(experiment, skip_duplicates=True)

# Add Experiment (SameNI)

### Add transfer entries

In [18]:
old_experiment_name = 'Real, Direct, se2d_fullgaussian2d, 4-set'
overall_best = False

yes, no = [], []
for entry in get_transfer_entries(old_experiment_name = old_experiment_name, overall_best=overall_best):
    try:
        Transfer().add_entry(**entry)
        yes.append(entry)
    except:
        no.append(entry)

### Add respective experiment (restriction) entries

In [33]:
# Set parameters for experiment
model_name = 'se2d_fullgaussian2d'
paths = ['data/static22564-2-12-preproc0.zip',
             'data/static22564-2-13-preproc0.zip',
             'data/static22564-3-8-preproc0.zip',
             'data/static22564-3-12-preproc0.zip']
detach_core = True
print('detach_core = {}'.format(detach_core))

experiment_name = 'Real, core_transfer (sameNI), {}, 4-set -> 4-set'.format(model_name)
experiment_comment = 'Transfer training on real data with {} and static_shared_loaders of the 4-set. Varying number of images in the transfer core.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'
trainer_fn = "nnsysident.training.trainers.standard_trainer"


dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)


trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1).drop('model_config', axis = 1)
model.rename(columns={'neuron_n': 'multi_match_n'}, inplace=True)

# Restrict here
model = model.loc[model['multi_match_n'] == 1000]

dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] and len(row['paths']) == len(paths) for loc, row in dataset.iterrows()]]
dataset = dataset.loc[(dataset['multi_match_n'] == 1000) & (dataset['image_n'] == 4399)]
dataset = dataset.loc[(dataset['multi_match_base_seed'].isin([1,2,3,4,5])) & (dataset['image_base_seed'] == 1)]
dataset = dataset.loc[dataset['exclude_multi_match_n'] == 3597]
trainer = trainer.loc[trainer['detach_core'] == detach_core]
assert len(trainer) == 1, 'Too many trainers!'


old_experiment_name = 'Real, Direct, {}, 4-set'.format(model_name)
transfer = get_transfer(old_experiment_name)
transfer = transfer.rename(columns = {'t_multi_match_base_seed': 'multi_match_base_seed', 
                                      't_image_base_seed': 'image_base_seed', 
                                      't_multi_match_n': 'multi_match_n', 
                                      't_image_n':'image_n'})

combinations = pd.merge(dataset, model, on=["multi_match_n", "image_n"]).sort_values(['multi_match_n', 'image_n'])
combinations = pd.merge(combinations, transfer, on=['multi_match_base_seed',
                                                    'image_base_seed'])

experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
               'transfer_hash': row['transfer_hash'], 
               "transfer_fn": row['transfer_fn'],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [42]:
ExperimentsTransfer.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
ExperimentsTransfer.Restrictions.insert(experiment)

# Add Experiment (best)

### Add dataset entries for different neuron and image seed

In [None]:
dataset = pd.DataFrame(Dataset.fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
dataset = dataset.loc[(dataset['multi_match_base_seed'].isin([1,2,3,4,5])) & (dataset['image_base_seed'] == 1)] # maybe comment here
dataset = dataset.loc[(dataset['multi_match_n'] == 3597)] # take any one number of neurons
dataset = dataset.loc[dataset['exclude_multi_match_n'].isnull()]

entries = []
for loc, row in dataset.iterrows():
    exclude_multi_match_n = 3597
    multi_match_n = 1000
    entry = dict(dataset_fn='nnsysident.datasets.mouse_loaders.static_shared_loaders', 
                 dataset_config = dict(paths=row.paths,
                                       batch_size=64,
                                       multi_match_n = multi_match_n,
                                      multi_match_base_seed = row.multi_match_base_seed,
                                      image_n = row.image_n,
                                      image_base_seed = row.image_base_seed,
                                      exclude_multi_match_n = exclude_multi_match_n),
                 dataset_comment='multi_match_n={}, image_n={}, exclude_multi_match_n={}'.format(multi_match_n, row.image_n, exclude_multi_match_n),
                 dataset_fabrikant='kklurz')
    entries.append(entry)
    
for entry in entries:
    Dataset().add_entry(**entry, skip_duplicates=True)

### Add transfer entries

In [None]:
old_experiment_name = 'Real, Direct, se2d_spatialxfeaturelinear, 4-set'
overall_best = True

for entry in get_transfer_entries(old_experiment_name = old_experiment_name, overall_best=overall_best):
    Transfer().add_entry(**entry)

### Add respective experiment (restriction) entries

In [87]:
model_name = 'se2d_fullgaussian2d'
paths = ['data/static22564-2-12-preproc0.zip',
             'data/static22564-2-13-preproc0.zip',
             'data/static22564-3-8-preproc0.zip',
             'data/static22564-3-12-preproc0.zip']
detach_core = True
print('detach_core = {}'.format(detach_core))


experiment_name = 'Real, core_transfer (best), {}, 4-set -> 4-set'.format(model_name)
experiment_comment = 'Transfer training on real data with {} and static_loaders of the 4-set. Varying number of multi_matchs and images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'
trainer_fn = "nnsysident.training.trainers.standard_trainer"


dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)


trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1).drop('model_config', axis = 1)
model.rename(columns={'neuron_n': 'multi_match_n'}, inplace=True)


# Restrict here
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] and len(row['paths']) == len(paths) for loc, row in dataset.iterrows()]]
dataset = dataset.loc[(dataset['multi_match_base_seed'].isin([1,2,3,4,5])) & (dataset['image_base_seed'] == 1)]
dataset = dataset.loc[~ dataset['exclude_multi_match_n'].isnull()]
dataset = dataset.loc[dataset['exclude_multi_match_n'] == 3597]
trainer = trainer.loc[trainer['detach_core'] == detach_core]
assert len(trainer) == 1, 'Too many trainers!'


old_experiment_name = 'Real, Direct, {}, 4-set'.format(model_name)
transfer = get_transfer(old_experiment_name)
transfer = transfer.loc[(transfer['t_multi_match_n'] == 3597) & (transfer['t_image_n'] == 4399)]
transfer = transfer.rename(columns = {'t_multi_match_base_seed': 'multi_match_base_seed', 't_image_base_seed': 'image_base_seed'})

combinations = pd.merge(dataset, model, on=["multi_match_n", "image_n"]).sort_values(['multi_match_n', 'image_n'])
combinations = pd.merge(combinations, transfer, on=['multi_match_base_seed', 'image_base_seed'])

experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
               'transfer_hash': row['transfer_hash'], 
               "transfer_fn": row['transfer_fn'],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [90]:
ExperimentsTransfer.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
ExperimentsTransfer.Restrictions.insert(experiment)

# Add Experiment (animal transfer)

### Train direct on test animal 

In [None]:
model_name = 'se2d_fullgaussian2d'
paths = ['data/static20457-5-9-preproc0.zip']
detach_core=False
print('detach_core = {}'.format(detach_core))

experiment_name = 'Real, Direct, {}, 20457-5-9'.format(model_name)
experiment_comment = 'Directly trained on real data with {} and static_loaders of 20457-5-9. Varying number of images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
trainer_fn = 'nnsysident.training.trainers.standard_trainer'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[dataset['exclude'].isnull()]
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] and len(row['paths']) == len(paths) for loc, row in dataset.iterrows()]]

dataset = dataset.loc[(dataset['neuron_n'] == 5335)].sort_values('image_n') # maybe comment here

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)

model = model.loc[model['multi_match_n'].isnull()] # maybe comment this
model = model.loc[model['neuron_n'] == 5335] # maybe comment this
model = model.loc[~model['grid_mean_predictor'].isnull()]
model = model.loc[model['hidden_channels'].isnull()]

trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1)
trainer = trainer.loc[(trainer['detach_core'] == detach_core)]    
assert len(trainer) == 1, 'Too many trainers!'

combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])
   
experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

#Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
#Experiments.Restrictions.insert(experiment, skip_duplicates=True)


### Train direct on animal to get transfer core

In [None]:
model_name = 'se2d_fullgaussian2d'
detach_core = False
print('detach_core = {}'.format(detach_core))

experiment_name = 'Real, Direct, {}, 11-S'.format(model_name)
experiment_comment = 'Directly trained on real data with {} and static_loaders of the 11-S. Full neurons and images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[dataset['dataset_comment'] == '11 datasets']

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)

model = model.loc[model['model_comment'] == '11 datasets']


trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1)
trainer = trainer.loc[(trainer['detach_core'] == detach_core)]    
assert len(trainer) == 1, 'Too many trainers!'

combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])
   
experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

# Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
# Experiments.Restrictions.insert(experiment, skip_duplicates=True)

### Add transfer entries

In [None]:
# add manually

experiment_name = 'Real, Direct, se2d_fullgaussian2d, 11-S'
data = pd.DataFrame((TrainedModel * Dataset * Model * Trainer * Seed * OracleScore *Experiments.Restrictions & 'experiment_name="{}"'.format(experiment_name)).fetch())

    
# Filter out relevant data
data = pd.concat([data, data['dataset_config'].apply(pd.Series)], axis = 1)#.drop('dataset_config', axis = 1)
data = pd.concat([data, data['model_config'].apply(pd.Series)], axis = 1)#.drop('model_config', axis = 1)
data = data.loc[(data['fraction_oracle'] == data['fraction_oracle'].max())]
assert len(data) == 1, 'should be only one best model left'


entry = dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer', 
             transfer_config = dict(t_model_hash=data.model_hash.values[0], t_dataset_hash=data.dataset_hash.values[0], t_trainer_hash=data.trainer_hash.values[0]),
             transfer_comment=experiment_name,
             transfer_fabrikant='kklurz')
# Transfer().add_entry(**entry)

### Add respective experiment (restriction) entries

In [None]:
model_name = 'se2d_fullgaussian2d'
paths = ['data/static20457-5-9-preproc0.zip']
detach_core = True
print('detach_core = {}'.format(detach_core))

experiment_name = 'Real, core_transfer (animal), {}, 11-S -> 20457-5-9'.format(model_name)
experiment_comment = 'Transfer training from the 11-S to 20457-5-9 with {} and static_loaders. Varying number of images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
trainer_fn = "nnsysident.training.trainers.standard_trainer"


dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[dataset['exclude'].isnull()]
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] and len(row['paths']) == len(paths) for loc, row in dataset.iterrows()]]
dataset = dataset.loc[(dataset['neuron_n'] == 5335)].sort_values('image_n') # maybe comment here

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)
model = model.loc[model['multi_match_n'].isnull()] # maybe comment this
model = model.loc[model['neuron_n'] == 5335] # maybe comment this
model = model.loc[~model['grid_mean_predictor'].isnull()]
model = model.loc[model['hidden_channels'].isnull()]

trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)
trainer = trainer.loc[trainer['detach_core'] == detach_core]

transfer_comment = 'Real, Direct, se2d_fullgaussian2d, 11-S'
transfer = pd.DataFrame((Transfer & 'transfer_comment = "{}"'.format(transfer_comment)).fetch())

combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])

experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
               'transfer_hash': transfer['transfer_hash'].values[0], 
               "transfer_fn": transfer['transfer_fn'].values[0],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [None]:
#ExperimentsTransfer.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
#ExperimentsTransfer.Restrictions.insert(experiment)

# Other stuff

### Add single entries in main tables

In [None]:
entry = dict(dataset_fn='nnsysident.datasets.mouse_loaders.static_loaders', 
             dataset_config = dict(paths=paths,
                                   batch_size=64,
                                   seed=1),
             dataset_comment='full dataset',
             dataset_fabrikant='kklurz')
#Dataset().add_entry(**entry)

entry = dict(model_fn='nnsysident.models.models.se2d_fullgaussian2d', 
             model_config = dict(),
             model_comment='default model',
             model_fabrikant='kklurz')
#Model().add_entry(**entry)

entry = dict(trainer_fn='nnsysident.training.trainers.standard_trainer', 
             trainer_config = dict(detach_core=True),
             trainer_comment='default trainer',
             trainer_fabrikant='kklurz')
#Trainer().add_entry(**entry)

entry = dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer', 
             transfer_config = dict(t_model_hash="d41d8cd98f00b204e9800998ecf8427e", t_dataset_hash='6fa162a20053a013ab4bd31a21950d35', t_trainer_hash='d41d8cd98f00b204e9800998ecf8427e'),
             transfer_comment='test transfer',
             transfer_fabrikant='kklurz')
#Transfer().add_entry(**entry)

### Compare model state dicts

In [None]:
def compare_models(state_dict_1, state_dict_2):
    models_differ = 0
    for key_item_1, key_item_2 in zip(state_dict_1.items(), state_dict_2.items()):
        if torch.equal(key_item_1[1], key_item_2[1]):
            pass
        else:
            models_differ += 1
            if (key_item_1[0] == key_item_2[0]):
                print('Mismtach found at', key_item_1[0])
            else:
                raise Exception
    if models_differ == 0:
        print('Models match perfectly! :)')

### Add failed Bayesian to Model()

In [None]:
model_fn = "nnsysident.models.models.se2d_pointpooled"

all_info = pd.DataFrame((TrainedModelBayesian * ModelBayesian * DatasetBayesian & 'model_fn = "{}"'.format(model_fn)).fetch())
all_info = pd.concat([all_info, all_info['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
all_info = pd.concat([all_info, all_info['model_config'].apply(pd.Series)], axis = 1)

for neuron_n in [100, 1000]:
    for image_n in [50, 100, 200, 500, 1000, 4000]:
        one_exp = all_info.loc[(all_info['neuron_n'] == neuron_n) & (all_info['image_n'] == image_n) & (~ all_info['hidden_kern'].isnull())].sort_values('score')
        best = one_exp.loc[one_exp['score'] == one_exp['score'].max()]
        print(len(one_exp))
        print(one_exp['neuron_n'].values[0], one_exp['image_n'].values[0])
#         Model().add_entry(model_fn=best['model_fn'].values[0],
#                           model_config=best['model_config'].values[0],
#                           model_fabrikant='kklurz',
#                           model_comment='{}, neuron_n={}, image_n={}'.format(best['model_fn'].values[0].split('.')[-1], 
#                                                                              best['neuron_n'].values[0], best['image_n'].values[0]), skip_duplicates=True)

In [None]:
model_fn = "nnsysident.models.models.se2d_spatialxfeaturelinear"

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [int(find_number(row.model_comment, arg + '=')[0]) for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1).drop('model_config', axis = 1).sort_values(['neuron_n', 'image_n'])
model = model.loc[~model['multi_match_n'].isnull()]
model = model.loc[~model['grid_mean_predictor'].isnull()]

model

### Plot hyperparameters

In [None]:
one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input', 'init_mu_range', 'init_sigma']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

### Add model configs according to similar # I and # N

In [None]:
model_fn = "nnsysident.models.models.se2d_spatialxfeaturelinear"

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)
model = model.loc[~model['multi_match_n'].isnull()]
model = model.loc[~model['grid_mean_predictor'].isnull()]


for multi_match_n in [1000]:
    new_multi_match_n = 972
    for image_n in [50, 200, 500, 1000, 2500, 4399]:
        row = model.loc[(model['multi_match_n'] == multi_match_n) & (model['image_n'] == image_n)]
        model_config = row['model_config'].values[0]
        model_config['gamma_readout'] = model_config['gamma_readout'] + 0.00000001
        entry = dict(model_fn=row['model_fn'].values[0],
                     model_config=model_config, 
                     model_fabrikant='kklurz', 
                     model_comment= '{}, multi_match_n={}, image_n={}'.format(row['model_fn'].values[0].split('.')[-1], 
                                                                              new_multi_match_n, row['image_n'].values[0]))
        Model().add_entry(**entry)