In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2 

In [2]:
import os
import datajoint as dj
dj.config['database.host'] = os.environ['DJ_HOST']
dj.config['database.user'] = os.environ['DJ_USER']
dj.config['database.password'] = os.environ['DJ_PASS']
dj.config['enable_python_native_blobs'] = True
dj.config['display.limit'] = 100

name = 'realdata' #"simdata"
dj.config['schema_name'] = f"konstantin_nnsysident_{name}"

In [3]:
import torch
import shutil
import numpy as np
import pickle 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
import matplotlib.pyplot as plt
import re
import seaborn as sns
import hiplot as hip

import nnfabrik
from nnfabrik.main import *
from nnfabrik import builder
from nnfabrik.utility.hypersearch import Bayesian

from nnsysident.tables.experiments import *
from nnsysident.tables.bayesian import *
from nnsysident.datasets.mouse_loaders import static_shared_loaders
from nnsysident.datasets.mouse_loaders import static_loaders

from nnsysident.datasets.mouse_loaders import static_loader

def find_number(text, c):
    return re.findall(r'%s(\d+)' % c, text)

def get_transfer(old_experiment_name):
    # prepare the Transfer table in a way that all the info about the transferred model is in the DataFrame. Just pd.merge (on transfer_fn and transfer_hash)
    # it then with the model that the transferred model was used for. 
    transfer = pd.DataFrame(Transfer.fetch())
    transfer = pd.concat([transfer, transfer['transfer_config'].apply(pd.Series)], axis = 1).drop('transfer_config', axis = 1)

    tm = pd.DataFrame((TrainedModel * Dataset * Seed * Experiments.Restrictions & 'experiment_name = "{}"'.format(old_experiment_name)).fetch()).rename(
        columns = {'model_hash': 't_model_hash', 'trainer_hash': 't_trainer_hash', 'dataset_hash': 't_dataset_hash'})
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['t_model_hash', 't_trainer_hash', 't_dataset_hash'])

    transfer = pd.merge(transfer, tm, how='inner', on=['t_model_hash', 't_trainer_hash', 't_dataset_hash'])
    transfer = pd.concat([transfer, transfer['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
    transfer.columns = ['t_' + col if col[:2] != 't_' and col[:8] != 'transfer'  else col for col in transfer.columns]
    transfer = transfer.sort_values(['t_neuron_n', 't_image_n', 't_neuron_base_seed', 't_image_base_seed'])
    return transfer

Connecting konstantin@sinzlab.chlkmukhxp6i.eu-central-1.rds.amazonaws.com:3306
Schema name: konstantin_nnsysident_realdata


In [None]:
multi_match_base_seeds = [1,2,3,4,5]
image_base_seeds = [1]

dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'
paths = ['data/static22564-2-12-preproc0.zip',
                     'data/static22564-2-13-preproc0.zip',
                     'data/static22564-3-8-preproc0.zip',
                     'data/static22564-3-12-preproc0.zip']

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[(dataset['multi_match_base_seed'] == 1) & (dataset['image_base_seed'] == 1)]
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
#dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()]

for loc, row in dataset.iterrows():
    for multi_match_base_seed in multi_match_base_seeds:
        for image_base_seed in image_base_seeds:
            dataset_config = row['dataset_config']
            dataset_config.update(multi_match_base_seed=multi_match_base_seed, image_base_seed=image_base_seed)
            Dataset().add_entry(dataset_fn=row['dataset_fn'], 
                                dataset_config=dataset_config, 
                                dataset_fabrikant=row['dataset_fabrikant'], 
                                dataset_comment=row['dataset_comment'], skip_duplicates=True)

In [None]:
Dataset()

In [None]:
(5 * 36) + 59 - 5

In [6]:
5 * 36 * 5

900

In [8]:
model_name = 'se2d_fullgaussian2d'
paths = ['data/static22564-2-12-preproc0.zip',
                     'data/static22564-2-13-preproc0.zip',
                     'data/static22564-3-8-preproc0.zip',
                     'data/static22564-3-12-preproc0.zip']

experiment_name = 'Real, Direct, {}, 4-set'.format(model_name)
experiment_comment = 'Directly trained on real data with {} and static_shared_loaders of the 4-set. Varying number of neurons and images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
dataset = dataset.loc[(dataset['multi_match_base_seed'].isin([1,2,3,4,5])) & (dataset['image_base_seed'] == 1)]
#dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()]



model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['multi_match_n', 'image_n']:
    model[arg] = [int(find_number(row.model_comment, arg + '=')[0]) for loc, row in model.iterrows()]
    
    
combinations = pd.merge(dataset, model, on=["multi_match_n", "image_n"]).sort_values(['multi_match_n', 'image_n'])
   
experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': 'd41d8cd98f00b204e9800998ecf8427e',
               'trainer_fn': 'nnsysident.training.trainers.standard_trainer',
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
Experiments.Restrictions.insert(experiment, skip_duplicates=True)

In [None]:
experiment_name = 'Real, Direct, se2d_fullgaussian2d, 4-set, seed comparison'

data = pd.DataFrame((TrainedModel * Dataset * Experiments.Restrictions & 'experiment_name="{}"'.format(experiment_name)).fetch())
data = pd.concat([data, data['dataset_config'].apply(pd.Series)], axis = 1)
data = data[['seed', 'multi_match_base_seed', 'image_base_seed', 'score']]

seed_seed = data.loc[(data['multi_match_base_seed'] == 1) & (data['image_base_seed'] == 5)].copy()
seed_neuron = data.loc[(data['seed'] == 1) & (data['image_base_seed'] == 1)].copy()
seed_image = data.loc[(data['multi_match_base_seed'] == 1) & (data['seed'] == 1)].copy()

seed_seed['Seed type'] = 'Model seed'
seed_neuron['Seed type'] = 'Neuron seed'
seed_image['Seed type'] = 'Image seed'

seed_data = pd.concat([seed_seed, seed_neuron, seed_image])

In [None]:
g = sns.catplot(x="Seed type", y="score", kind="violin", inner='point', data=seed_data)
#sns.swarmplot(x="Seed type", y="score", color="k", size=3, data=seed_data, ax=g.ax)
sns.despine(trim=True)

In [None]:
sns.boxenplot(x="Seed type", y="score", data=seed_data)
sns.despine(trim=True)

In [None]:
sns.set_context('paper')

sns.swarmplot(x="Seed type", y="score",  size=10, data=seed_data)
sns.despine(trim=True)

In [None]:
fig, ax = plt.subplots()

sns.swarmplot(x="Seed type", y="score",  size=10, data=seed_data)
#ax.set(ylim=(0, 1))
sns.despine(trim=True)

ax1 = fig.add_axes([.99, .14, .2, .75])
ax1.hist(seed_data.loc[seed_data['Seed type'] == 'Model seed', 'score'].values, orientation='horizontal')
#ax1.set(ylim=(0, 1))
sns.despine(top=True)

In [None]:
fig, ax  = plt.subplots(figsize=(6, 3), dpi=150)

sns.distplot(seed_data.loc[seed_data['Seed type'] == 'Model seed', 'score'].values, hist=True, vertical=True, color="g", kde=False, bins=20, ax=ax, norm_hist=True)
# sns.distplot(seed_data.loc[seed_data['Seed type'] == 'Neuron seed', 'score'].values, hist=False, vertical=True, color="r", norm_hist=True, kde_kws={"shade": True}, ax=ax)
# sns.distplot(seed_data.loc[seed_data['Seed type'] == 'Image seed', 'score'].values, hist=False, vertical=True, color="b", norm_hist=True, kde_kws={"shade": True}, ax=ax)

---

# Deterministic gaussian

In [None]:
model_fn = "nnsysident.models.models.se2d_deterministicgaussian2d"

all_info = pd.DataFrame((TrainedModelBayesian * ModelBayesian * DatasetBayesian & 'model_fn = "{}"'.format(model_fn)).fetch())
all_info = pd.concat([all_info, all_info['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
all_info = pd.concat([all_info, all_info['model_config'].apply(pd.Series)], axis = 1).replace({'share_features': {True: 1, False: 0}})
all_info.fillna({'input_kern': 9, 'hidden_kern': 7}, inplace=True)


In [None]:
info = all_info 

one_exp_h = info.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout','input_kern', 'hidden_kern', "share_features", 'multi_match_n', 'image_n']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

# Spatialxfeature

In [None]:
model_fn = "nnsysident.models.models.se2d_spatialxfeaturelinear"

all_info = pd.DataFrame((TrainedModelBayesian * ModelBayesian * DatasetBayesian & 'model_fn = "{}"'.format(model_fn)).fetch())
all_info = pd.concat([all_info, all_info['dataset_config'].apply(pd.Series)], axis = 1)
all_info = pd.concat([all_info, all_info['model_config'].apply(pd.Series)], axis = 1)
all_info.fillna({'input_kern': 9, 'hidden_kern': 7}, inplace=True)


In [None]:
info = all_info 

one_exp_h = info.set_index('score', drop=False).copy()
cols = ['score',  'gamma_input', 'gamma_readout','input_kern', 'hidden_kern', 'multi_match_n', 'image_n']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

In [None]:
model_fn = 'nnsysident.models.models.se2d_spatialxfeaturelinear'

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'image_n']:
    model[arg] = [int(find_number(row.model_comment, arg + '=')[0]) for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1).drop('model_config', axis = 1)
model.sort_values(['neuron_n', 'image_n'], inplace=True)

one_exp_h = model.set_index('gamma_readout', drop=False).copy()
cols = ['gamma_readout', 'neuron_n', 'image_n']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

# Gaussian

In [None]:
model_fn = "nnsysident.models.models.se2d_fullgaussian2d"

all_info = pd.DataFrame((TrainedModelBayesian * ModelBayesian * DatasetBayesian & 'model_fn = "{}"'.format(model_fn)).fetch())
all_info = pd.concat([all_info, all_info['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
all_info = pd.concat([all_info, all_info['model_config'].apply(pd.Series)], axis = 1).replace({'share_features': {True: 1, False: 0}})
all_info.fillna({'input_kern': 9, 'hidden_kern': 7, 'init_mu_range': 0.3, 'init_sigma': 0.1}, inplace=True)


all_info = all_info.loc[~all_info['grid_mean_predictor'].isnull()]


# for neuron_n in [100, 1000]:
#     for image_n in [50, 100, 200, 500, 1000, 4000]:
#         one_exp = all_info.loc[(all_info['neuron_n'] == neuron_n) & (all_info['image_n'] == image_n) & (~ all_info['hidden_kern'].isnull())].sort_values('score')
#         best = one_exp.loc[one_exp['score'] == one_exp['score'].max()]
#         print(len(one_exp))
#         print(one_exp['neuron_n'].values[0], one_exp['image_n'].values[0])
#         Model().add_entry(model_fn=best['model_fn'].values[0],
#                           model_config=best['model_config'].values[0],
#                           model_fabrikant='kklurz',
#                           model_comment='{}, neuron_n={}, image_n={}'.format(best['model_fn'].values[0].split('.')[-1], 
#                                                                              best['neuron_n'].values[0], best['image_n'].values[0]), skip_duplicates=True)

In [None]:
info = all_info 

one_exp_h = info.set_index('score', drop=False).copy()
cols = ['score',  'gamma_input', 'gamma_readout','init_mu_range', 'init_sigma', 'input_kern', 'hidden_kern', 'share_features', 'multi_match_n', 'image_n']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

# Share True

In [None]:
neuron_n = 500
image_n = 200
share_features = True
hidden_kern = 15

info = all_info.loc[(all_info['multi_match_n'] == neuron_n) & (all_info['image_n'] == image_n) & (all_info['share_features'] == share_features)].sort_values('score')


one_exp = info.loc[(info['hidden_kern'] == hidden_kern) ].sort_values('score')

one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

In [None]:
neuron_n = 3625
image_n = 4399
share_features = True
hidden_kern = 15

info = all_info.loc[(all_info['multi_match_n'] == neuron_n) & (all_info['image_n'] == image_n) & (all_info['share_features'] == share_features)].sort_values('score')


one_exp = info.loc[(info['hidden_kern'] == hidden_kern) ].sort_values('score')

one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

## hidden 7

In [None]:
neuron_n = 500
image_n = 200
share_features = True

info = all_info.loc[(all_info['multi_match_n'] == neuron_n) & (all_info['image_n'] == image_n) & (all_info['share_features'] == share_features)].sort_values('score')


one_exp = info.loc[(info['hidden_kern'].isnull()) ].sort_values('score')

one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

In [None]:
neuron_n = 3625
image_n = 4399
share_features = True

info = all_info.loc[(all_info['multi_match_n'] == neuron_n) & (all_info['image_n'] == image_n) & (all_info['share_features'] == share_features)].sort_values('score')


one_exp = info.loc[(info['hidden_kern'].isnull()) ].sort_values('score')

one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

# Share False

In [None]:
neuron_n = 500
image_n = 200
share_features = False

info = all_info.loc[(all_info['multi_match_n'] == neuron_n) & (all_info['image_n'] == image_n) & (all_info['share_features'] == share_features)].sort_values('score')


one_exp = info.sort_values('score')

one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

In [None]:
neuron_n = 3625
image_n = 4399
share_features = False

info = all_info.loc[(all_info['multi_match_n'] == neuron_n) & (all_info['image_n'] == image_n) & (all_info['share_features'] == share_features)].sort_values('score')


one_exp = info.loc[(info['hidden_kern'].isnull()) ].sort_values('score')

one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

In [None]:
one_exp = info.loc[(info['hidden_kern'].isnull()) ].sort_values('score')

one_exp_h = one_exp.set_index('score', drop=False).copy()
cols = ['score', 'gamma_readout', 'gamma_input', 'init_mu_range', 'init_sigma']
hip.Experiment.from_dataframe(one_exp_h[cols]).display(force_full_width=False)

In [None]:
info