In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import os
import datajoint as dj
dj.config['database.host'] = os.environ['DJ_HOST']
dj.config['database.user'] = os.environ['DJ_USER']
dj.config['database.password'] = os.environ['DJ_PASS']
dj.config['enable_python_native_blobs'] = True
dj.config['display.limit'] = 200

name = 'interspecies_development'
os.environ["DJ_SCHEMA_NAME"] = f"konstantin_nnsysident_{name}"

In [None]:
import re
import torch
import numpy as np
import pickle
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 10)
from collections import OrderedDict, Iterable
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

import nnfabrik
from nnfabrik.main import *
from nnfabrik import builder
from nnfabrik.utility.hypersearch import Bayesian

from nnsysident.tables.experiments import *
from nnsysident.tables.bayesian import *
from nnsysident.datasets.mouse_loaders import static_shared_loaders, static_loaders, static_loader
from nnsysident.tables.scoring import OracleScore, OracleScoreTransfer

from neuralpredictors.data.datasets import StaticImageSet, FileTreeDataset
from dataport.bcm.static import PreprocessedMouseData

### Examine datasets to find neurons per area

In [None]:
dataset_fn = "nnsysident.datasets.mouse_loaders.static_loaders"

dataset_paths = ["/notebooks/data/static20892-9-10-preproc0.zip",
                "/notebooks/data/static20892-9-11-preproc0.zip",
                "/notebooks/data/static20892-10-10-preproc0.zip",
                "/notebooks/data/static22083-6-18-preproc0.zip",
                "/notebooks/data/static23549-4-8-preproc0.zip",
                "/notebooks/data/static23555-26-19-preproc0.zip",
                "/notebooks/data/static23555-26-20-preproc0.zip",
                "/notebooks/data/static23555-67-10-preproc0.zip",
                "/notebooks/data/static23555-67-11-preproc0.zip",
                "/notebooks/data/static23603-5-19-preproc0.zip",
                "/notebooks/data/static23656-10-20-preproc0.zip",
                "/notebooks/data/static23656-10-21-preproc0.zip",
                "/notebooks/data/static23946-3-10-preproc0.zip",
                "/notebooks/data/static23961-3-19-preproc0.zip",
                "/notebooks/data/static23961-3-20-preproc0.zip",
                "/notebooks/data/static23964-3-14-preproc0.zip",
                "/notebooks/data/static23964-13-10-preproc0.zip",
                "/notebooks/data/static23964-13-11-preproc0.zip",
                "/notebooks/data/static24391-6-17-preproc0.zip",
                "/notebooks/data/static24391-6-18-preproc0.zip",
                "/notebooks/data/static24457-3-9-preproc0.zip",
                "/notebooks/data/static24457-3-12-preproc0.zip"]

dataset_paths_crypto = ['/notebooks/data/static20892-9-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static20892-9-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static20892-10-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static22083-6-18-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23549-4-8-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23555-26-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23555-26-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23555-67-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23555-67-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23603-5-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23656-10-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23656-10-21-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static23946-3-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23961-3-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23961-3-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-3-14-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-13-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-13-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static24391-6-17-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip', #### -> transfer dataset
                         '/notebooks/data/static24391-6-18-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static24457-3-9-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
                         '/notebooks/data/static24457-3-12-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip']


# dataset_paths_crypto = ['/notebooks/data/static24391-6-17-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip'] # this is for the transfer training

In [None]:
filenames = []
for i, dataset_path in enumerate(dataset_paths):
    animal_id, session, scan_idx, preproc_id = re.findall(r'\d+', dataset_path)
    entry = (PreprocessedMouseData & f'animal_id={animal_id}' & f'session={session}' & f'scan_idx={scan_idx}' & f'filename!="{dataset_path[16:]}"')
    filenames.append('/notebooks/data/' + entry.fetch1('filename'))

In [None]:
dataset_config = dict(
                    paths=dataset_paths_crypto,
                    batch_size=100000,
                    seed=1,
                    file_tree=True,
)

dataloaders = builder.get_data(dataset_fn, dataset_config)

In [None]:
cutoff = 350

neurons = {}
neuron_n = []
pervious_set = set()
for i, (data_key, dataloader) in enumerate(dataloaders['train'].items()):
    print(data_key)

    # images
    image_set = set(dataloader.dataset.trial_info.condition_hash)
    print("identical images: " + str(len(pervious_set & image_set)) + " / " + str(len(image_set)))
    pervious_set = pervious_set | image_set

    # neurons
    for area in np.unique(dataloader.dataset.neurons.area):
        if area in ["AM", "P", "unknown", "LI", "POR"]:
            continue
        area_idx = np.where(dataloader.dataset.neurons.area == area)[0]

        if area == "V1":
            print(area + ': {}'.format(min(cutoff, len(area_idx))))
            neuron_n.append(min(cutoff, len(area_idx)))
        if area in neurons:
            neurons[area] += min(cutoff, len(area_idx))
        else:
            neurons[area] = min(cutoff, len(area_idx))
    print('------------------------------------------------')

print(neurons)
print(neuron_n)

# Add entries to nnfabrik tables

### Dataset

In [None]:
paths = ['/notebooks/data/static20892-9-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static20892-9-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static20892-10-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static22083-6-18-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23549-4-8-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23555-26-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23555-26-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23555-67-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23555-67-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23603-5-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23656-10-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23656-10-21-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23946-3-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23961-3-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23961-3-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-3-14-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-13-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-13-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static24391-6-17-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static24391-6-18-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static24457-3-9-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static24457-3-12-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip']


dataset_config =  dict(
                    paths=paths,
                    batch_size=64,
                    seed=1,
                    file_tree=True,
                    layers=["L2/3"],

                    areas=["V1"],
                    neuron_n=350)

dataset_comment = "V1 neurons (~ 5700 from 16 datasets)"

# Dataset().add_entry(dataset_fn = "nnsysident.datasets.mouse_loaders.static_loaders",
#                     dataset_config = dataset_config,
#                     dataset_fabrikant = 'kklurz',
#                     dataset_comment = dataset_comment)

In [None]:
# paths = ['/notebooks/data/static24391-6-17-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip']


# dataset_config =  dict(
#                     paths=paths,
#                     batch_size=64,
#                     seed=1,
#                     file_tree=True,
#                     layers=["L2/3"],

#                     areas=["RL"],
#                     neuron_n=218)

# dataset_comment = "Transfer dataset: RL neurons (218)"

# Dataset().add_entry(dataset_fn = "nnsysident.datasets.mouse_loaders.static_loaders",
#                     dataset_config = dataset_config,
#                     dataset_fabrikant = 'kklurz',
#                     dataset_comment = dataset_comment)

### Model

In [None]:
grid_mean_pred_config = {
    "type": "cortex",
    "input_dimensions": 2,
    "hidden_layers": 0,
    "hidden_features": 0,
    "final_tanh": False,
}
model_config = {
    "pad_input": False,
    "stack": -1,
    "layers": 4,
    "input_kern": 15,
    "gamma_input": 1,
    "gamma_readout": 2.439,
    "hidden_dilation": 1,
    "hidden_kern": 13,
    "hidden_channels": 64,
    "n_se_blocks": 0,
    "depth_separable": True,
    "share_features": False,
    "share_grid": False,
    "init_sigma": 0.4,
    "init_mu_range": 0.55,
    "gauss_type": "full",
    "grid_mean_predictor": grid_mean_pred_config,
}


model_comment = "standard model"

# Model().add_entry(model_fn = "nnsysident.models.models.se2d_fullgaussian2d",
#                     model_config = model_config,
#                     model_fabrikant = 'kklurz',
#                     model_comment = model_comment)

### Trainer

In [None]:
# Trainer().add_entry(trainer_fn = "nnsysident.training.trainers.standard_trainer",
#                     trainer_config = {'detach_core': False},
#                     trainer_fabrikant = 'kklurz',
#                     trainer_comment = "direct_trainer")

# Add entries for direct

In [None]:
model_name = 'se2d_fullgaussian2d'
paths = ['/notebooks/data/static20892-9-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static20892-9-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static20892-10-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static22083-6-18-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23549-4-8-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23555-26-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23555-26-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23555-67-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23555-67-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23603-5-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23656-10-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23656-10-21-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static23946-3-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23961-3-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23961-3-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-3-14-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-13-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#                          '/notebooks/data/static23964-13-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static24391-6-17-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static24391-6-18-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static24457-3-9-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
         '/notebooks/data/static24457-3-12-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip']
detach_core = False
print('detach_core = {}'.format(detach_core))

experiment_name = 'Direct cores on 1 area each (indiv. hyperparams)'.format(model_name)
experiment_comment = "Direct training of cores for one area each with multiple datasets. Hypersearch was done for each area. Areas are ['AL', 'LM', 'PM', 'RL', 'V1']. Cores were trained on 5700 neurons from 16 datasets for each area.".format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
trainer_fn = "nnsysident.training.trainers.standard_trainer"


dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[dataset['dataset_comment'].str.contains('\(~ 5700')]
dataset['areas_match'] = dataset['areas'].astype(str)

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
model = model.loc[model['model_comment'].str.contains('\[')]
model['areas_match'] = "[" + model['model_comment'].str.extract(".*\[(.*)\].*") + "]"

trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)
trainer = trainer.loc[trainer['detach_core'] == detach_core]

dataset['key'] = 1
model['key'] = 1
trainer['key'] = 1
combinations = pd.merge(pd.merge(model, dataset, on=["areas_match", "key"]), trainer, on="key").drop('key',axis=1)


experiment = [{'dataset_hash': row['dataset_hash'],
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': row['trainer_hash'],
               'trainer_fn': row['trainer_fn'],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [None]:
# model_name = 'se2d_fullgaussian2d'
# paths = ['/notebooks/data/static20892-9-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static20892-9-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static20892-10-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static22083-6-18-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23549-4-8-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23555-26-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23555-26-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23555-67-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23555-67-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23603-5-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23656-10-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23656-10-21-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static23946-3-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
# #                          '/notebooks/data/static23961-3-19-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
# #                          '/notebooks/data/static23961-3-20-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
# #                          '/notebooks/data/static23964-3-14-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
# #                          '/notebooks/data/static23964-13-10-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
# #                          '/notebooks/data/static23964-13-11-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
# #          '/notebooks/data/static24391-6-17-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static24391-6-18-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static24457-3-9-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip',
#          '/notebooks/data/static24457-3-12-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip']
# detach_core = False
# print('detach_core = {}'.format(detach_core))

# experiment_name = 'Direct cores on 1 area each'
# experiment_comment = f"Direct training of cores for one area each with multiple datasets. \
# Areas are ['AL', 'LM', 'PM', 'RL', 'V1']. \
# Cores were trained on 5700 neurons from 14, 15 or 16 datasets for each area."

# fabrikant_name = 'kklurz'

# model_fn = "nnsysident.models.models.{}".format(model_name)
# dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
# trainer_fn = "nnsysident.training.trainers.standard_trainer"


# dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
# dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
# dataset = dataset.loc[dataset['dataset_comment'].str.contains('Direct')]

# model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
# model = model.loc[model['model_comment'].str.contains('standard')]

# trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
# trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)
# trainer = trainer.loc[trainer['detach_core'] == detach_core]

# dataset['key'] = 1
# model['key'] = 1
# trainer['key'] = 1
# combinations = pd.merge(pd.merge(model, dataset, on=["key"]), trainer, on="key").drop('key',axis=1)


# experiment = [{'dataset_hash': row['dataset_hash'],
#                'dataset_fn': row['dataset_fn'],
#                'model_hash': row['model_hash'],
#                'model_fn': row['model_fn'],
#                'trainer_hash': row['trainer_hash'],
#                'trainer_fn': row['trainer_fn'],
#                  'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [None]:
# Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
# Experiments.Restrictions.insert(experiment, skip_duplicates=True)

# Add entries for transfer

In [None]:
# add manually

for area, n_datasets in zip(['AL', 'LM', 'PM', 'RL', 'V1'], [15, 15, 14, 15, 16]):
    transfer_comment = f"{area} core (indiv. hyperparams) from 5700 neurons in {n_datasets} datasets"
    data = pd.DataFrame((TrainedModel * Dataset * Model * Trainer * Seed).fetch())

    # Filter out relevant data
#     data = pd.concat([data, data['dataset_config'].apply(pd.Series)], axis = 1)#.drop('dataset_config', axis = 1)
#     data = pd.concat([data, data['model_config'].apply(pd.Series)], axis = 1)#.drop('model_config', axis = 1)
    data = data.loc[data['dataset_comment'].str.contains(f"{area} neurons \(~ 5700")]
    data = data.loc[data['model_comment'].str.contains(f"\['{area}']")]
    data = data.loc[(data['score'] == data['score'].max())]
    assert len(data) == 1, 'should be only one best model left'


    entry = dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer',
                 transfer_config = dict(t_model_hash=data.model_hash.values[0], t_dataset_hash=data.dataset_hash.values[0], t_trainer_hash=data.trainer_hash.values[0]),
                 transfer_comment=transfer_comment,
                 transfer_fabrikant='kklurz')
#     Transfer().add_entry(**entry)

In [None]:
model_name = 'se2d_fullgaussian2d'
paths = ['/notebooks/data/static24391-6-17-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip']
detach_core = True
print('detach_core = {}'.format(detach_core))

experiment_name = 'Transfer between areas (indiv. hyperparams)'.format(model_name)
experiment_comment = "Transfer training from cores trained on different areas to other (and the same) areas. Hypersearch was done for each area. The best core is chosen depending on the seed. Areas are ['AL', 'LM', 'PM', 'RL', 'V1']. Cores were trained on 5700 neurons from 14, 15 or 16 datasets for each area.".format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
trainer_fn = "nnsysident.training.trainers.standard_trainer"


dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] and len(row['paths']) == len(paths) for loc, row in dataset.iterrows()]]

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
model = model.loc[model['model_comment'].str.contains(f"\[")]
model['areas_match'] = "[" + model['model_comment'].str.extract(".*\[(.*)\].*") + "]"

trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)
trainer = trainer.loc[trainer['detach_core'] == detach_core]

transfer_comment = "core (indiv. hyperparams) from 5700 neurons"
transfer = pd.DataFrame((Transfer & 'transfer_comment like "%{}%"'.format(transfer_comment)).fetch())
transfer['areas_match'] = "['" + transfer['transfer_comment'].str.extract("(.*) core.*") + "']"

transfer['key'] = 1
dataset['key'] = 1
model['key'] = 1
trainer['key'] = 1
combinations = pd.merge(pd.merge(pd.merge(transfer, model, on=["areas_match", "key"]), trainer, on="key"), dataset, on="key").drop('key',axis=1)


experiment = [{'dataset_hash': row['dataset_hash'],
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': row['trainer_hash'],
               'trainer_fn': row['trainer_fn'],
               'transfer_hash': row['transfer_hash'],
               "transfer_fn": row['transfer_fn'],
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [27]:
# # add manually
# neuron_base_seed = 5
# for area, n_datasets in zip(['AL', 'LM', 'PM', 'RL', 'V1'], [15, 15, 14, 15, 16]):
#     transfer_comment = f"{area} core from 5700 neurons in {n_datasets} datasets, neuron_base_seed={neuron_base_seed}"
#     data = pd.DataFrame((TrainedModel * Dataset * Model * Trainer * Seed).fetch())

#     # Filter out relevant data
#     data = pd.concat([data, data['dataset_config'].apply(pd.Series)], axis = 1)#.drop('dataset_config', axis = 1)
# #     data = pd.concat([data, data['model_config'].apply(pd.Series)], axis = 1)#.drop('model_config', axis = 1)
#     data = data.loc[data['dataset_comment'].str.contains(f"{area} neurons \(~ 5700")]
#     data = data.loc[data['model_comment'].str.contains("standard")]
#     data = data.loc[data['neuron_base_seed'] == neuron_base_seed]
    
#     data = data.loc[(data['score'] == data['score'].max())]
#     assert len(data) == 1, 'should be only one best model left'


#     entry = dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer',
#                  transfer_config = dict(t_model_hash=data.model_hash.values[0], t_dataset_hash=data.dataset_hash.values[0], t_trainer_hash=data.trainer_hash.values[0]),
#                  transfer_comment=transfer_comment,
#                  transfer_fabrikant='kklurz')
# #     Transfer().add_entry(**entry)

In [39]:
# model_name = 'se2d_fullgaussian2d'
# paths = ['/notebooks/data/static24391-6-17-GrayImageNet-7bed7f7379d99271be5d144e5e59a8e7.zip']
# detach_core = True
# print('detach_core = {}'.format(detach_core))

# experiment_name = 'Transfer between areas'.format(model_name)
# experiment_comment = "Transfer training from cores trained on different areas to other (and the same) areas. The best core is chosen depending on the seed. Areas are ['AL', 'LM', 'PM', 'RL', 'V1']. Cores were trained on 5700 neurons from 14, 15 or 16 datasets for each area.".format(model_name)
# fabrikant_name = 'kklurz'

# model_fn = "nnsysident.models.models.{}".format(model_name)
# dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
# trainer_fn = "nnsysident.training.trainers.standard_trainer"


# dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
# dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
# dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] and len(row['paths']) == len(paths) for loc, row in dataset.iterrows()]]

# model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
# model = model.loc[model['model_comment'].str.contains("standard")]

# trainer = pd.DataFrame((Trainer & 'trainer_fn="{}"'.format(trainer_fn)).fetch())
# trainer = pd.concat([trainer, trainer['trainer_config'].apply(pd.Series)], axis = 1).drop('trainer_config', axis = 1)
# trainer = trainer.loc[trainer['detach_core'] == detach_core]

# transfer_comment = "core from 5700 neurons"
# transfer = pd.DataFrame((Transfer & 'transfer_comment like "%{}%"'.format(transfer_comment)).fetch())

# transfer['key'] = 1
# dataset['key'] = 1
# model['key'] = 1
# trainer['key'] = 1
# combinations = pd.merge(pd.merge(pd.merge(transfer, model, on=["key"]), trainer, on="key"), dataset, on="key").drop('key',axis=1)


# experiment = [{'dataset_hash': row['dataset_hash'],
#                'dataset_fn': row['dataset_fn'],
#                'model_hash': row['model_hash'],
#                'model_fn': row['model_fn'],
#                'trainer_hash': row['trainer_hash'],
#                'trainer_fn': row['trainer_fn'],
#                'transfer_hash': row['transfer_hash'],
#                "transfer_fn": row['transfer_fn'],
#                  'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

detach_core = True


In [42]:
# ExperimentsTransfer.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
# ExperimentsTransfer.Restrictions.insert(experiment, skip_duplicates=True)