In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2 

In [2]:
import os
import datajoint as dj
dj.config['database.host'] = os.environ['DJ_HOST']
dj.config['database.user'] = os.environ['DJ_USER']
dj.config['database.password'] = os.environ['DJ_PASS']
dj.config['enable_python_native_blobs'] = True
dj.config['display.limit'] = 200

name = 'realdata' #"simdata"
dj.config['schema_name'] = f"konstantin_nnsysident_{name}"

In [175]:
import torch
import shutil
import numpy as np
import pickle 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
import matplotlib.pyplot as plt
import re
import seaborn as sns
import hiplot as hip
import statsmodels

import nnfabrik
from nnfabrik.main import *
from nnfabrik import builder
from nnfabrik.utility.hypersearch import Bayesian
from nnsysident.utility.measures import get_correlations

from nnsysident.tables.experiments import *
from nnsysident.tables.bayesian import *
from nnsysident.datasets.mouse_loaders import static_shared_loaders
from nnsysident.datasets.mouse_loaders import static_loaders
from nnsysident.tables.scoring import OracleScore, OracleScoreTransfer

from nnsysident.datasets.mouse_loaders import static_loader

def find_number(text, c):
    number_list = re.findall(r'%s(\d+)' % c, text)
    if len(number_list) == 0:
        number = None
    elif len(number_list) == 1:
        number = int(number_list[0])
    else:
        raise ValueError('More than one number found..') 
    return number

def get_transfer(old_experiment_name):
    # prepare the Transfer table in a way that all the info about the transferred model is in the DataFrame. Just pd.merge (on transfer_fn and transfer_hash)
    # it then with the model that the transferred model was used for. 
    transfer = pd.DataFrame(Transfer.fetch())
    transfer = pd.concat([transfer, transfer['transfer_config'].apply(pd.Series)], axis = 1).drop('transfer_config', axis = 1)

    tm = pd.DataFrame((TrainedModel * Dataset * Seed * Experiments.Restrictions & 'experiment_name = "{}"'.format(old_experiment_name)).fetch()).rename(
        columns = {'model_hash': 't_model_hash', 'trainer_hash': 't_trainer_hash', 'dataset_hash': 't_dataset_hash'})
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['t_model_hash', 't_trainer_hash', 't_dataset_hash'])

    transfer = pd.merge(transfer, tm, how='inner', on=['t_model_hash', 't_trainer_hash', 't_dataset_hash'])
    transfer = pd.concat([transfer, transfer['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
    transfer.columns = ['t_' + col if col[:2] != 't_' and col[:8] != 'transfer'  else col for col in transfer.columns]
    transfer = transfer.sort_values(['t_multi_match_n', 't_image_n', 't_multi_match_base_seed', 't_image_base_seed'])
    return transfer


def get_transfer_entries(old_experiment_name, overall_best):
    tm = pd.DataFrame((TrainedModel * Dataset * Seed * Experiments.Restrictions & 'experiment_name="{}"'.format(old_experiment_name)).fetch())
    tm = pd.concat([tm, tm['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)

    model_fn = np.unique(tm['model_fn'])
    assert len(model_fn) == 1 ,"Must have exactly 1 model function in experiment"
    model_fn = model_fn[0] 

    # Filter out best model(s) 
    if overall_best is True:
        tm = tm.loc[(tm['multi_match_n'] == tm['multi_match_n'].max()) & (tm['image_n'] == tm['image_n'].max())]
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['multi_match_n', 'image_n', 'multi_match_base_seed', 'image_base_seed']).sort_values(['multi_match_n', 'image_n'])

    # make entries for Trasfer table
    entries = [dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer', 
                     transfer_config = dict(t_model_hash=row.model_hash, t_dataset_hash=row.dataset_hash, t_trainer_hash=row.trainer_hash),
                     transfer_comment=model_fn.split('.')[-1] + ', multi_match_n={}, multi_match_base_seed={}, image_n={}, image_base_seed={}'.format(row.multi_match_n, 
                                                                                                                                    row.multi_match_base_seed, 
                                                                                                                                    row.image_n, 
                                                                                                                                    row.image_base_seed),
                     transfer_fabrikant='kklurz') for loc, row in tm.iterrows()]
    return entries

In [None]:
#### For the 4 different animals experiment!!!!!!!!!!!!

# paths = ['data/static22564-3-12-preproc0.zip', 
#          'data/static22846-2-19-preproc0.zip', 
#          'data/static23555-4-20-preproc0.zip', 
#          'data/static23343-5-17-preproc0.zip']

# entry = dict(dataset_fn='nnsysident.datasets.mouse_loaders.static_loaders', 
#              dataset_config = dict(paths=paths,
#                                    batch_size=64,
#                                    neuron_n=3600,
#                                    image_n=4400,
#                                    neuron_base_seed=1,
#                                    image_base_seed=1),
#              dataset_comment='4 different collection IDs in different animals, neuron_n=3600, image_n=4400',
#              dataset_fabrikant='kklurz')
#Dataset().add_entry(**entry)

####################################################################

model_name = 'se2d_fullgaussian2d'
# paths = ['data/static22564-3-12-preproc0.zip', 
#          'data/static22846-2-19-preproc0.zip', 
#          'data/static23555-4-20-preproc0.zip', 
#          'data/static23343-5-17-preproc0.zip']

paths = ['data/static23603-3-14-preproc0.zip'
         'data/static23605-1-16-perproc0.zip'
         'data/static22846-2-21-preproc0.zip'
         'data/static22564-3-12-preproc0.zip']

experiment_name = 'Real, Direct, {}, 4-DifferentAnimals-SameImages'.format(model_name)
experiment_comment = 'Directly trained on real data with {} and static_loaders of 4 datasets (different animals) which have the same Image Collection ID (6).'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)


combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])
   
experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': 'd41d8cd98f00b204e9800998ecf8427e',
               'trainer_fn': 'nnsysident.training.trainers.standard_trainer',
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

# Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
# Experiments.Restrictions.insert(experiment, skip_duplicates=True)

In [None]:
TrainedModel * Experiments.Restrictions & 'experiment_name = "Real, Direct, se2d_fullgaussian2d, 4-DifferentAnimals-SameImages"'

In [None]:
entry = dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer', 
             transfer_config = dict(t_model_hash="0077c1f2e4af36219a4419cba48f48d6", t_dataset_hash='1a50f3d0b833d361a6f629bd951d09aa', t_trainer_hash='d41d8cd98f00b204e9800998ecf8427e'),
             transfer_comment='4-set, DiffAnimalsSameImages',
             transfer_fabrikant='kklurz')
#Transfer().add_entry(**entry)

---

In [270]:
model_name = 'taskdriven_fullgaussian2d'
paths = ['data/static20457-5-9-preproc0.zip']

experiment_name = 'Real, Direct, {} (random VGG), 20457-5-9'.format(model_name)
experiment_comment = 'random VGG transfer on real data with {} and static_loaders of 20457-5-9. Varying number of images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
dataset = dataset.loc[(dataset['neuron_n'] == 5335)].sort_values('image_n') # maybe comment here
dataset = dataset.loc[(dataset['exclude'].isnull())]

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)
model = model.loc[model['multi_match_n'].isnull()] # maybe comment this
model = model.loc[model['neuron_n'] == 5335] # maybe comment this
model = model.loc[model['pretrained'] == False]


combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])
   
experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': 'd41d8cd98f00b204e9800998ecf8427e',
               'trainer_fn': 'nnsysident.training.trainers.standard_trainer',
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

In [273]:
Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
Experiments.Restrictions.insert(experiment, skip_duplicates=True)

In [296]:
experiment_name = "Real, Direct, taskdriven_fullgaussian2d (random VGG16 fixed core), 20457-5-9"

TrainedModel.progress(Experiments.Restrictions & 'seed in (1,2,3,4,5)' & 'experiment_name="{}"'.format(experiment_name))

TrainedModel         Completed 150 of 150 (100.0%)   2020-06-04 17:45:30


(0, 150)

In [297]:
experiment_name = "Real, direct (RANDOM fixed core 128), se2d_fullgaussian2d, 20457-5-9"

TrainedModel.progress(Experiments.Restrictions & 'seed in (1,2,3,4,5)' & 'experiment_name="{}"'.format(experiment_name))

TrainedModel         Completed 150 of 150 (100.0%)   2020-06-04 17:45:31


(0, 150)

In [293]:
schema.jobs

table_name  className of the table,key_hash  key hash,"status  if tuple is missing, the job is available",key  structure containing the key,error_message  error message returned if failed,error_stack  error stack if failed,user  database user,host  system hostname,pid  system process id,connection_id  connection_id(),timestamp  automatic timestamp
__trained_model,567dd46557101fe865746d008e32143a,reserved,=BLOB=,,=BLOB=,konstantin@cantor.mvl6.uni-tuebingen.de,df3dcdbcccea,1,83961,2020-06-04 17:16:13
__trained_model,735124166b10c7001888043e0e071505,reserved,=BLOB=,,=BLOB=,konstantin@cantor.mvl6.uni-tuebingen.de,181ca1137550,1,83962,2020-06-04 17:16:23
__trained_model,b8561dea5c56f00019389e829eb8ef9c,reserved,=BLOB=,,=BLOB=,konstantin@banach.mvl6.uni-tuebingen.de,8ff01f2e35ce,1,83964,2020-06-04 16:53:53
__trained_model,cc4acba3cb745503fade782a82808dc5,reserved,=BLOB=,,=BLOB=,konstantin@banach.mvl6.uni-tuebingen.de,f9694aba99f6,1,83963,2020-06-04 17:16:15


In [368]:
model_name = 'se2d_fullgaussian2d'
model_fn = "nnsysident.models.models.{}".format(model_name)

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)
model = model.loc[model['multi_match_n'] == 3625] # maybe comment this
model = model.loc[model['share_features'] == True] # maybe comment this
model = model.loc[model['grid_mean_predictor'].isnull()]

In [369]:
model.sort_values('image_n')

Unnamed: 0,model_fn,model_hash,model_config,model_fabrikant,model_comment,model_ts,neuron_n,multi_match_n,image_n,share_features,init_mu_range,init_sigma,input_kern,hidden_kern,gamma_input,gamma_readout,grid_mean_predictor,hidden_channels
30,nnsysident.models.models.se2d_fullgaussian2d,5705656e96e2783c1b433331d44839b7,"{'share_features': True, 'init_mu_range': 0.55...",kklurz,"se2d_fullgaussian2d, multi_match_n=3625, image...",2020-06-04 18:26:31,,3625.0,50,True,0.55,0.4,15,13,1.0,100.0,,
21,nnsysident.models.models.se2d_fullgaussian2d,386ee55db1784341148645b118bfb8af,"{'share_features': True, 'init_mu_range': 0.55...",kklurz,"se2d_fullgaussian2d, multi_match_n=3625, image...",2020-06-04 16:01:19,,3625.0,200,True,0.55,0.4,15,13,1.0,92.313719,,
35,nnsysident.models.models.se2d_fullgaussian2d,69394838d12b1fc33d0b19731b38fef1,"{'share_features': True, 'init_mu_range': 0.55...",kklurz,"se2d_fullgaussian2d, multi_match_n=3625, image...",2020-06-04 18:46:03,,3625.0,500,True,0.55,0.4,15,13,1.0,95.075051,,
62,nnsysident.models.models.se2d_fullgaussian2d,963b6941654ff17178ed0c93e5e95e23,"{'share_features': True, 'init_mu_range': 0.55...",kklurz,"se2d_fullgaussian2d, multi_match_n=3625, image...",2020-06-04 19:13:29,,3625.0,1000,True,0.55,0.4,15,13,1.0,86.147605,,
91,nnsysident.models.models.se2d_fullgaussian2d,e51d139645db6f8632f2a755f8abea08,"{'share_features': True, 'init_mu_range': 0.55...",kklurz,"se2d_fullgaussian2d, multi_match_n=3625, image...",2020-06-04 18:54:39,,3625.0,2500,True,0.55,0.4,15,13,1.0,68.567051,,
46,nnsysident.models.models.se2d_fullgaussian2d,7e0485ee327ecbdd45180f032d66bd59,"{'share_features': True, 'init_mu_range': 0.55...",kklurz,"se2d_fullgaussian2d, multi_match_n=3625, image...",2020-06-05 02:39:20,,3625.0,4399,True,0.55,0.4,15,13,1.0,1.282481,,


In [397]:
model_name = 'se2d_fullgaussian2d'
paths = ['data/static22564-2-12-preproc0.zip',
                     'data/static22564-2-13-preproc0.zip',
                     'data/static22564-3-8-preproc0.zip',
                     'data/static22564-3-12-preproc0.zip']

experiment_name = 'Real, Direct, {}, [cortex, ~features], 4-set'.format(model_name)
experiment_comment = 'Directly trained on real data with {} and static_shared_loaders of the 4-set. Varying number images. cortex, no shared features'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_shared_loaders'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
dataset = dataset.loc[(dataset['multi_match_base_seed'].isin([1,2,3,4,5])) & (dataset['image_base_seed'] == 1)] # maybe comment here
dataset = dataset.loc[dataset['multi_match_n'] == 3625]
#dataset = dataset.loc[dataset['exclude_neuron_n'].isnull()] # maybe comment here

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)
model = model.loc[model['multi_match_n'] == 3625] # maybe comment this
model = model.loc[model['share_features'] == False] # maybe comment this
model = model.loc[~model['grid_mean_predictor'].isnull()]
    
combinations = pd.merge(dataset, model, on=["multi_match_n", "image_n"]).sort_values(['multi_match_n', 'image_n'])
   
experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': 'd41d8cd98f00b204e9800998ecf8427e',
               'trainer_fn': 'nnsysident.training.trainers.standard_trainer',
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]

Experiments.insert1(dict(experiment_name=experiment_name, experiment_fabrikant=fabrikant_name, experiment_comment=experiment_comment))
Experiments.Restrictions.insert(experiment, skip_duplicates=True)

In [419]:
for experiment_name in ["Real, Direct, se2d_fullgaussian2d, [cortex, ~features], 4-set",
                        "Real, Direct, se2d_fullgaussian2d, [~cortex, features], 4-set",
                        "Real, Direct, se2d_fullgaussian2d, [~cortex, ~features], 4-set"]:

    TrainedModel.progress(Experiments.Restrictions & 'seed in (1,2,3,4,5)' & 'experiment_name="{}"'.format(experiment_name))

TrainedModel         Completed 150 of 150 (100.0%)   2020-06-05 11:59:16
TrainedModel         Completed 150 of 150 (100.0%)   2020-06-05 11:59:16
TrainedModel         Completed 150 of 150 (100.0%)   2020-06-05 11:59:16


In [420]:
schema.jobs

table_name  className of the table,key_hash  key hash,"status  if tuple is missing, the job is available",key  structure containing the key,error_message  error message returned if failed,error_stack  error stack if failed,user  database user,host  system hostname,pid  system process id,connection_id  connection_id(),timestamp  automatic timestamp
,,,,,,,,,,


In [222]:
paths  = [ path[5:] for path in ['data/static22564-2-12-preproc0.zip',
  'data/static22564-2-13-preproc0.zip',
  'data/static22564-3-8-preproc0.zip',
  'data/static22564-3-12-preproc0.zip',
  'data/static22846-2-19-preproc0.zip',
  'data/static22846-2-21-preproc0.zip',
  'data/static22846-10-16-preproc0.zip',
  'data/static23343-5-17-preproc0.zip',
  'data/static23555-4-20-preproc0.zip',
  'data/static23555-5-12-preproc0.zip',
  'data/static23656-14-22-preproc0.zip',
  'data/static20457-5-9-preproc0.zip',
  'data/static20505-6-1-preproc0.zip']]

dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'
dataset_config = dict(
    paths=paths,
    batch_size=64,
    seed=1
)
dataloaders = builder.get_data(dataset_fn, dataset_config)

for data_key, dataloader in dataloaders['train'].items():
    n_neurons = dataloader.dataset.n_neurons
    n_images = len(dataloader.dataset.trial_info.frame_image_id)
    print('data_key = {}, n_neurons = {}, n_images = {}'.format(data_key, n_neurons, n_images))

static22564-2-12-preproc0.zip does not exist locally. Trying to fetch.
Unzipping static22564-2-12-preproc0.zip into /notebooks
static22564-2-13-preproc0.zip does not exist locally. Trying to fetch.
Unzipping static22564-2-13-preproc0.zip into /notebooks
static22564-3-8-preproc0.zip does not exist locally. Trying to fetch.
Unzipping static22564-3-8-preproc0.zip into /notebooks
static22564-3-12-preproc0.zip does not exist locally. Trying to fetch.
Unzipping static22564-3-12-preproc0.zip into /notebooks
static22846-2-19-preproc0.zip does not exist locally. Trying to fetch.
Unzipping static22846-2-19-preproc0.zip into /notebooks
static22846-2-21-preproc0.zip does not exist locally. Trying to fetch.
Unzipping static22846-2-21-preproc0.zip into /notebooks
static22846-10-16-preproc0.zip does not exist locally. Trying to fetch.
Unzipping static22846-10-16-preproc0.zip into /notebooks
static23343-5-17-preproc0.zip does not exist locally. Trying to fetch.
Unzipping static23343-5-17-preproc0.zip 