In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2 

In [None]:
import os
import datajoint as dj
dj.config['database.host'] = os.environ['DJ_HOST']
dj.config['database.user'] = os.environ['DJ_USER']
dj.config['database.password'] = os.environ['DJ_PASS']
dj.config['enable_python_native_blobs'] = True

name = "simdata"
dj.config['schema_name'] = f"konstantin_nnsysident_{name}"

In [None]:
import torch
import numpy as np
import pickle 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)
import matplotlib.pyplot as plt
import re

import nnfabrik
from nnfabrik.main import *
from nnfabrik import builder
from nnfabrik.utility.hypersearch import Bayesian

from nnsysident.tables.experiments import *
from nnsysident.tables.bayesian import *
from nnsysident.datasets.mouse_loaders import static_shared_loaders
from nnsysident.datasets.mouse_loaders import static_loaders
from nnsysident.datasets.mouse_loaders import static_loader

def find_number(text, c):
    return re.findall(r'%s(\d+)' % c, text)

def get_transfer(old_experiment_name):
    # prepare the Transfer table in a way that all the info about the transferred model is in the DataFrame. Just pd.merge (on transfer_fn and transfer_hash)
    # it then with the model that the transferred model was used for. 
    transfer = pd.DataFrame(Transfer.fetch())
    transfer = pd.concat([transfer, transfer['transfer_config'].apply(pd.Series)], axis = 1).drop('transfer_config', axis = 1)

    tm = pd.DataFrame((TrainedModel * Dataset * Seed * Experiments.Restrictions & 'experiment_name = "{}"'.format(old_experiment_name)).fetch()).rename(
        columns = {'model_hash': 't_model_hash', 'trainer_hash': 't_trainer_hash', 'dataset_hash': 't_dataset_hash'})
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['t_model_hash', 't_trainer_hash', 't_dataset_hash'])

    transfer = pd.merge(transfer, tm, how='inner', on=['t_model_hash', 't_trainer_hash', 't_dataset_hash'])
    transfer = pd.concat([transfer, transfer['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
    transfer.columns = ['t_' + col if col[:2] != 't_' and col[:8] != 'transfer'  else col for col in transfer.columns]
    transfer = transfer.sort_values(['t_neuron_n', 't_image_n', 't_neuron_base_seed', 't_image_base_seed'])
    return transfer

def get_transfer_entries(old_experiment_name, overall_best):
    tm = pd.DataFrame((TrainedModel * Dataset * Seed * Experiments.Restrictions & 'experiment_name="{}"'.format(old_experiment_name)).fetch())
    tm = pd.concat([tm, tm['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)

    model_fn = np.unique(tm['model_fn'])
    assert len(model_fn) == 1 ,"Must have exactly 1 model function in experiment"
    model_fn = model_fn[0] 

    # Filter out best model(s) 
    if overall_best is True:
        tm = tm.loc[(tm['neuron_n'] == tm['neuron_n'].max()) & (tm['image_n'] == tm['image_n'].max())]
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['neuron_n', 'image_n', 'neuron_base_seed', 'image_base_seed']).sort_values(['neuron_n', 'image_n'])

    # make entries for Trasfer table
    entries = [dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer', 
                     transfer_config = dict(t_model_hash=row.model_hash, t_dataset_hash=row.dataset_hash, t_trainer_hash=row.trainer_hash),
                     transfer_comment=model_fn.split('.')[-1] + ', neuron_n={}, neuron_base_seed={}, image_n={}, image_base_seed={}'.format(row.neuron_n, 
                                                                                                                                    row.neuron_base_seed, 
                                                                                                                                    row.image_n, 
                                                                                                                                    row.image_base_seed),
                     transfer_fabrikant='kklurz') for loc, row in tm.iterrows()]
    return entries

# Add Experiment (direkt)

### Add dataset entries for different neuron and image seed

In [None]:
neuron_base_seeds = [1,2,3]
image_base_seeds = [1,2,3]

dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
paths = ['data/static0-0-3-preproc0.zip']

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[(dataset['neuron_base_seed'] == 1) & (dataset['image_base_seed'] == 1) & (dataset['paths'].isin([paths]))]
dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()]

for loc, row in dataset.iterrows():
    for neuron_base_seed in neuron_base_seeds:
        for image_base_seed in image_base_seeds:
            dataset_config = row['dataset_config']
            dataset_config.update(neuron_base_seed=neuron_base_seed, image_base_seed=image_base_seed)
            Dataset().add_entry(dataset_fn=row['dataset_fn'], 
                                dataset_config=dataset_config, 
                                dataset_fabrikant=row['dataset_fabrikant'], 
                                dataset_comment=row['dataset_comment'], skip_duplicates=True)

### Add respective experiment (restriction) entries

In [None]:
model_name = 'se2d_fullgaussian2d'
paths = ['data/static0-0-3-preproc0.zip']

experiment_name = 'SIM, Direct, {}, 0-0-3'.format(model_name)
experiment_comment = 'Directly trained on simulated data with {} and static_loaders of static0-0-3-preproc0.zip. Varying number of neurons and images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[dataset['paths'].isin([paths])]
dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()]

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [int(find_number(row.model_comment, arg + '=')[0]) for loc, row in model.iterrows()]

    
    
combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])
   
experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': 'd41d8cd98f00b204e9800998ecf8427e',
               'trainer_fn': 'nnsysident.training.trainers.standard_trainer',
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

#Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
#Experiments.Restrictions.insert(experiment)

# Add Experiment (SameNI)

### Add transfer entries

In [None]:
old_experiment_name = 'SIM, Direct, se2d_fullgaussian2d, 0-0-3'
overall_best = False

for entry in get_transfer_entries(old_experiment_name = old_experiment_name, overall_best=overall_best):
    Transfer().add_entry(**entry)

### Add respective experiment (restriction) entries

In [None]:
# Set parameters for experiment
model_name = 'se2d_fullgaussian2d'
paths = ['data/static0-0-3-preproc0.zip']

experiment_name = 'SIM, core_transfer (sameNI), {}, 0-0-3 -> 0-0-3'.format(model_name)
experiment_comment = 'Transer training on simulated data with {} and static_loaders of static0-0-3-preproc0.zip. Varying number of neurons and images in the transfer core.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
trainer_fn = "nnsysident.training.trainers.standard_trainer"


dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)


trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [int(find_number(row.model_comment, arg + '=')[0]) for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1).drop('model_config', axis = 1)


# Restrict here
model = model
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
dataset = dataset.loc[~ dataset['exclude_neuron_n'].isnull()]
trainer = trainer.loc[trainer['detach_core'] == True]


old_experiment_name = 'SIM, Direct, {}, 0-0-3'.format(model_name)
transfer = get_transfer(old_experiment_name)
transfer = transfer.rename(columns = {'t_neuron_base_seed': 'neuron_base_seed', 
                                      't_image_base_seed': 'image_base_seed', 
                                      't_neuron_n': 'neuron_n', 
                                      't_image_n':'image_n'})

combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])
combinations = pd.merge(combinations, transfer, on=['neuron_base_seed',
                                                    'image_base_seed', 
                                                    'neuron_n',
                                                    'image_n'])

experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
               'transfer_hash': row['transfer_hash'], 
               "transfer_fn": row['transfer_fn'],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [None]:
ExperimentsTransfer.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
ExperimentsTransfer.Restrictions.insert(experiment)

# Add Experiment (best)

### Add dataset entries for different neuron and image seed

In [None]:
neuron_base_seeds = [1,2,3]
image_base_seeds = [1,2,3]

dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
paths = ['data/static0-0-3-preproc0.zip']

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[(dataset['neuron_base_seed'] == 1) & 
                      (dataset['image_base_seed'] == 1) & 
                      [np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
dataset = dataset.loc[~ dataset['exclude_neuron_n'].isnull()]

for loc, row in dataset.iterrows():
    for neuron_base_seed in neuron_base_seeds:
        for image_base_seed in image_base_seeds:
            dataset_config = row['dataset_config']
            dataset_config.update(neuron_base_seed=neuron_base_seed, image_base_seed=image_base_seed)
            Dataset().add_entry(dataset_fn=row['dataset_fn'], 
                                dataset_config=dataset_config, 
                                dataset_fabrikant=row['dataset_fabrikant'], 
                                dataset_comment=row['dataset_comment'], skip_duplicates=True)

### Add transfer entries

In [None]:
old_experiment_name = 'SIM, Direct, se2d_fullgaussian2d, 0-0-3'
overall_best = True

for entry in get_transfer_entries(old_experiment_name = old_experiment_name, overall_best=overall_best):
    Transfer().add_entry(**entry)

### Add respective experiment (restriction) entries

In [None]:
model_name = 'se2d_spatialxfeaturelinear'
paths = ['data/static0-0-3-preproc0.zip']


experiment_name = 'SIM, core_transfer (best), {}, 0-0-3 -> 0-0-3'.format(model_name)
experiment_comment = 'Directly trained on simulated data with {} and static_loaders of static0-0-3-preproc0.zip. Varying number of neurons and images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
trainer_fn = "nnsysident.training.trainers.standard_trainer"


dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)


trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [int(find_number(row.model_comment, arg + '=')[0]) for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1).drop('model_config', axis = 1)


# Restrict here
model = model
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
dataset = dataset.loc[~ dataset['exclude_neuron_n'].isnull()]
trainer = trainer.loc[trainer['detach_core'] == True]


old_experiment_name = 'SIM, Direct, {}, 0-0-3'.format(model_name)
transfer = get_transfer(old_experiment_name)
transfer = transfer.loc[(transfer['t_neuron_n'] == 1000) & (transfer['t_image_n'] == 4000)]
transfer = transfer.rename(columns = {'t_neuron_base_seed': 'neuron_base_seed', 't_image_base_seed': 'image_base_seed'})

combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])
combinations = pd.merge(combinations, transfer, on=['neuron_base_seed', 'image_base_seed'])

experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': trainer['trainer_hash'].values[0],
               'trainer_fn': trainer['trainer_fn'].values[0],
               'transfer_hash': row['transfer_hash'], 
               "transfer_fn": row['transfer_fn'],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [None]:
#ExperimentsTransfer.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
ExperimentsTransfer.Restrictions.insert(experiment)

# Other stuff

### Add single entries in main tables

In [None]:
entry = dict(dataset_fn='nnsysident.datasets.mouse_loaders.static_loaders', 
             dataset_config = dict(paths=paths,
                                   batch_size=64,
                                   seed=1),
             dataset_comment='full dataset',
             dataset_fabrikant='kklurz')
#Dataset().add_entry(**entry)

entry = dict(model_fn='nnsysident.models.models.se2d_fullgaussian2d', 
             model_config = dict(),
             model_comment='default model',
             model_fabrikant='kklurz')
#Model().add_entry(**entry)

entry = dict(trainer_fn='nnsysident.training.trainers.standard_trainer', 
             trainer_config = dict(detach_core=True),
             trainer_comment='default trainer',
             trainer_fabrikant='kklurz')
#Trainer().add_entry(**entry)

entry = dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer', 
             transfer_config = dict(t_model_hash="d41d8cd98f00b204e9800998ecf8427e", t_dataset_hash='6fa162a20053a013ab4bd31a21950d35', t_trainer_hash='d41d8cd98f00b204e9800998ecf8427e'),
             transfer_comment='test transfer',
             transfer_fabrikant='kklurz')
#Transfer().add_entry(**entry)

### Add dataset entries for #images and #neurons

In [None]:
dataset = pd.DataFrame(Dataset.fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()]

for loc, row in dataset.iterrows():
    exclude_neuron_n = 3000
    entry = dict(dataset_fn='nnsysident.datasets.mouse_loaders.static_loaders', 
                 dataset_config = dict(paths=row.paths,
                                       batch_size=64,
                                       neuron_n = row.neuron_n,
                                      neuron_base_seed = row.neuron_base_seed,
                                      image_n = row.image_n,
                                      image_base_seed = row.image_base_seed,
                                      exclude_neuron_n = exclude_neuron_n),
                 dataset_comment='neuron_n={}, image_n={}, exclude_neuron_n={}'.format(row.neuron_n, row.image_n, exclude_neuron_n),
                 dataset_fabrikant='kklurz')
    #Dataset().add_entry(**entry, skip_duplicates=True)

### Compare model state dicts

In [None]:
def compare_models(state_dict_1, state_dict_2):
    models_differ = 0
    for key_item_1, key_item_2 in zip(state_dict_1.items(), state_dict_2.items()):
        if torch.equal(key_item_1[1], key_item_2[1]):
            pass
        else:
            models_differ += 1
            if (key_item_1[0] == key_item_2[0]):
                print('Mismtach found at', key_item_1[0])
            else:
                raise Exception
    if models_differ == 0:
        print('Models match perfectly! :)')

### Add failed Bayesian to Model()

In [None]:
model_fn = "nnsysident.models.models.se2d_pointpooled"

all_info = pd.DataFrame((TrainedModelBayesian * ModelBayesian * DatasetBayesian & 'model_fn = "{}"'.format(model_fn)).fetch())
all_info = pd.concat([all_info, all_info['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
all_info = pd.concat([all_info, all_info['model_config'].apply(pd.Series)], axis = 1)

for neuron_n in [100, 1000]:
    for image_n in [50, 100, 200, 500, 1000, 4000]:
        one_exp = all_info.loc[(all_info['neuron_n'] == neuron_n) & (all_info['image_n'] == image_n) & (~ all_info['hidden_kern'].isnull())].sort_values('score')
        best = one_exp.loc[one_exp['score'] == one_exp['score'].max()]
        print(len(one_exp))
        print(one_exp['neuron_n'].values[0], one_exp['image_n'].values[0])
#         Model().add_entry(model_fn=best['model_fn'].values[0],
#                           model_config=best['model_config'].values[0],
#                           model_fabrikant='kklurz',
#                           model_comment='{}, neuron_n={}, image_n={}'.format(best['model_fn'].values[0].split('.')[-1], 
#                                                                              best['neuron_n'].values[0], best['image_n'].values[0]), skip_duplicates=True)

In [None]:
model_fn = "nnsysident.models.models.se2d_spatialxfeaturelinear"

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [int(find_number(row.model_comment, arg + '=')[0]) for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1).drop('model_config', axis = 1).sort_values(['neuron_n', 'image_n'])
model

### Plot hyperparameters

In [None]:
one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input', 'init_mu_range', 'init_sigma']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)