In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2 

In [3]:
import os
import datajoint as dj
dj.config['database.host'] = os.environ['DJ_HOST']
dj.config['database.user'] = os.environ['DJ_USER']
dj.config['database.password'] = os.environ['DJ_PASS']
dj.config['enable_python_native_blobs'] = True
dj.config['display.limit'] = 200

name = 'realdata' #"simdata"
dj.config['schema_name'] = f"konstantin_nnsysident_{name}"

In [128]:
import torch
import shutil
import numpy as np
import pickle 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
import matplotlib.pyplot as plt
import matplotlib
import re
import string
import seaborn as sns
import hiplot as hip
import statsmodels

import nnfabrik
from nnfabrik.main import *
from nnfabrik import builder
from nnfabrik.utility.hypersearch import Bayesian
from nnsysident.utility.measures import get_correlations

from nnsysident.tables.experiments import *
from nnsysident.tables.bayesian import *
from nnsysident.datasets.mouse_loaders import static_shared_loaders
from nnsysident.datasets.mouse_loaders import static_loaders
from nnsysident.tables.scoring import OracleScore, OracleScoreTransfer

from nnsysident.datasets.mouse_loaders import static_loader

def find_number(text, c):
    number_list = re.findall(r'%s(\d+)' % c, text)
    if len(number_list) == 0:
        number = None
    elif len(number_list) == 1:
        number = int(number_list[0])
    else:
        raise ValueError('More than one number found..') 
    return number

def get_transfer(transfer_hashes):
    # prepare the Transfer table in a way that all the info about the transferred model is in the DataFrame. Just pd.merge (on transfer_fn and transfer_hash)
    # it then with the model that the transferred model was used for. 
    
    transfer = pd.DataFrame((Transfer & 'transfer_hash in {}'.format(tuple(transfer_hashes))).fetch())
    transfer = pd.concat([transfer, transfer['transfer_config'].apply(pd.Series)], axis = 1).drop('transfer_config', axis = 1)

    restriction = transfer.rename(columns = {'t_model_hash': 'model_hash', 't_dataset_hash': 'dataset_hash', 't_trainer_hash': 'trainer_hash'})            
    restriction = restriction[['model_hash', 'dataset_hash', 'trainer_hash']].to_dict('records')

    tm = pd.DataFrame((TrainedModel * Dataset * Seed & restriction).fetch()).rename(
        columns = {'model_hash': 't_model_hash', 'trainer_hash': 't_trainer_hash', 'dataset_hash': 't_dataset_hash'})               
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['t_model_hash', 't_trainer_hash', 't_dataset_hash'])

    transfer = pd.merge(transfer, tm, how='inner', on=['t_model_hash', 't_trainer_hash', 't_dataset_hash'])
    transfer = pd.concat([transfer, transfer['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)
    transfer.columns = ['t_' + col if col[:2] != 't_' and col[:8] != 'transfer'  else col for col in transfer.columns]
    transfer = transfer.sort_values(['t_multi_match_n', 't_image_n', 't_multi_match_base_seed', 't_image_base_seed'])
    return transfer


def get_transfer_entries(old_experiment_name, overall_best):
    tm = pd.DataFrame((TrainedModel * Dataset * Seed * Experiments.Restrictions & 'experiment_name="{}"'.format(old_experiment_name)).fetch())
    tm = pd.concat([tm, tm['dataset_config'].apply(pd.Series)], axis = 1).drop('dataset_config', axis = 1)

    model_fn = np.unique(tm['model_fn'])
    assert len(model_fn) == 1 ,"Must have exactly 1 model function in experiment"
    model_fn = model_fn[0] 

    # Filter out best model(s) 
    if overall_best is True:
        tm = tm.loc[(tm['multi_match_n'] == tm['multi_match_n'].max()) & (tm['image_n'] == tm['image_n'].max())]
    tm = tm.sort_values('score', ascending=False).drop_duplicates(['multi_match_n', 'image_n', 'multi_match_base_seed', 'image_base_seed']).sort_values(['multi_match_n', 'image_n'])

    # make entries for Trasfer table
    entries = [dict(transfer_fn='nnsysident.models.transfer_functions.core_transfer', 
                     transfer_config = dict(t_model_hash=row.model_hash, t_dataset_hash=row.dataset_hash, t_trainer_hash=row.trainer_hash),
                     transfer_comment=model_fn.split('.')[-1] + ', multi_match_n={}, multi_match_base_seed={}, image_n={}, image_base_seed={}'.format(row.multi_match_n, 
                                                                                                                                    row.multi_match_base_seed, 
                                                                                                                                    row.image_n, 
                                                                                                                                    row.image_base_seed),
                     transfer_fabrikant='kklurz') for loc, row in tm.iterrows()]
    return entries

---

In [57]:
model_fn = "nnsysident.models.models.taskdriven_fullgaussian2d"

model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)

In [61]:
model.loc[model['pretrained'] == False]

Unnamed: 0,model_fn,model_hash,model_config,model_fabrikant,model_comment,model_ts,multi_match_n,image_n,tl_model_name,layers,init_mu_range,init_sigma,share_features,grid_mean_predictor,gamma_readout,pretrained
4,nnsysident.models.models.taskdriven_fullgaussi...,451a333628395a7b3f7c656430e1d937,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"taskdriven_fullgaussian2d, neuron_n=5335, imag...",2020-06-03 12:13:42,,200,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",16.386389,False
5,nnsysident.models.models.taskdriven_fullgaussi...,4da49cf8feb2b86b415a45b8cc9c5dff,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"taskdriven_fullgaussian2d, neuron_n=5335, imag...",2020-06-03 13:08:07,,500,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",6.757978,False
6,nnsysident.models.models.taskdriven_fullgaussi...,4e71ae99466a995d3993a8facc6f09a7,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"pretrained=False, taskdriven_fullgaussian2d, n...",2020-05-30 10:41:20,,50,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",30.282069,False
8,nnsysident.models.models.taskdriven_fullgaussi...,655c9e04e43ef831c786c7aec95495bb,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"taskdriven_fullgaussian2d, neuron_n=5335, imag...",2020-06-03 13:59:22,,1000,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",6.799911,False
9,nnsysident.models.models.taskdriven_fullgaussi...,667cf7a725bdfbd9e371401f1d16282b,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"pretrained=False, taskdriven_fullgaussian2d, n...",2020-05-30 10:41:20,,2500,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",2.731651,False
10,nnsysident.models.models.taskdriven_fullgaussi...,791ee051d8c0a2d7133163c14b02c257,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"pretrained=False, taskdriven_fullgaussian2d, n...",2020-05-30 10:41:20,,4472,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",1.417425,False
12,nnsysident.models.models.taskdriven_fullgaussi...,96925edb6826bc0499723b66a2377a9e,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"pretrained=False, taskdriven_fullgaussian2d, n...",2020-05-30 10:41:20,,200,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",0.050364,False
13,nnsysident.models.models.taskdriven_fullgaussi...,978540b34bb5814a1b12c288696b4fe4,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"taskdriven_fullgaussian2d, neuron_n=5335, imag...",2020-06-03 13:07:11,,500,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",6.079521,False
16,nnsysident.models.models.taskdriven_fullgaussi...,ac4ab218507081d2d85b5e4eb5262268,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"pretrained=False, taskdriven_fullgaussian2d, n...",2020-05-30 10:41:21,,1000,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",2.796603,False
17,nnsysident.models.models.taskdriven_fullgaussi...,b82f8952dd131f3d4a5c4bcedf26510f,"{'tl_model_name': 'vgg16', 'layers': 8, 'init_...",kklurz,"taskdriven_fullgaussian2d, neuron_n=5335, imag...",2020-06-03 12:07:47,,50,vgg16,8,0.55,0.4,False,"{'type': 'cortex', 'input_dimensions': 2, 'hid...",19.975273,False


In [172]:
model_fn='nnsysident.models.models.taskdriven_fullgaussian2d'
(Model  & 'model_fn="{}"'.format(model_fn)).fetch(limit=1, as_dict=True)

[{'model_fn': 'nnsysident.models.models.taskdriven_fullgaussian2d',
  'model_hash': '1449437d8ad570b0eed13b98f81ed5ec',
  'model_config': {'tl_model_name': 'vgg16',
   'layers': 8,
   'init_mu_range': 0.55,
   'init_sigma': 0.4,
   'share_features': False,
   'grid_mean_predictor': {'type': 'cortex',
    'input_dimensions': 2,
    'hidden_layers': 0,
    'hidden_features': 0,
    'final_tanh': False},
   'gamma_readout': 4.622488854650272},
  'model_fabrikant': 'kklurz',
  'model_comment': 'taskdriven_fullgaussian2d, neuron_n=5335, image_n=2500',
  'model_ts': datetime.datetime(2020, 6, 3, 16, 36, 53)}]

In [164]:
Experiments()

experiment_name  name of experiment,experiment_fabrikant  Name of the contributor that added this entry,experiment_comment  short description,experiment_ts  UTZ timestamp at time of insertion
"Real, direct (RANDOM fixed core), se2d_fullgaussian2d, 20457-5-9",kklurz,Directly trained with random fixed weihts in core on real data with se2d_fullgaussian2d and static_loaders of 20457-5-9. Varying number of images.,2020-05-29 23:00:19
"Real, Direct, se2d_fullgaussian2d, 11-set",kklurz,"Directly trained on real data with se2d_fullgaussian2d and static_loaders of the 11-set. Full datasets used, the goal is to later use the core for transfer.",2020-05-24 16:30:04
"Real, Direct, se2d_fullgaussian2d, 20457-5-9",kklurz,Directly trained on real data with se2d_fullgaussian2d and static_loaders of 20457-5-9. Varying number of images.,2020-05-25 13:41:46
"Real, Direct, se2d_fullgaussian2d, 20505-6-1",kklurz,Directly trained on real data with se2d_fullgaussian2d and static_loaders of 20505-6-1. Core for later transfer.,2020-05-25 19:31:56
"Real, Direct, se2d_fullgaussian2d, 4-DifferentAnimals-DifferentImages",kklurz,"Directly trained on real data with se2d_fullgaussian2d and static_loaders of 4 datasets (different animals) which have different Image Collection IDs (6,7,9,8).",2020-05-30 23:14:43
"Real, Direct, se2d_fullgaussian2d, 4-DifferentAnimals-SameImages",kklurz,Directly trained on real data with se2d_fullgaussian2d and static_loaders of 4 datasets (different animals) which have the same Image Collection ID (6).,2020-05-31 13:37:20
"Real, Direct, se2d_fullgaussian2d, 4-set",kklurz,Directly trained on real data with se2d_fullgaussian2d and static_shared_loaders of the 4-set. Varying number of neurons and images.,2020-05-22 17:19:58
"Real, Direct, se2d_fullgaussian2d, 4-set, last 972 neurons",kklurz,Directly trained on real data with se2d_fullgaussian2d and static_shared_loaders of the 4-set. Varying number of images. The same 972 neurons that were used for the best core transfer experiment.,2020-05-28 23:47:47
"Real, Direct, se2d_fullgaussian2d, 4-set, seed comparison",kklurz,"Directly trained on real data with se2d_fullgaussian2d and static_shared_loaders of the 4-set. Comparing the influence of model, image and neuron seed for 1000 image and neurons.",2020-05-22 14:08:33
"Real, Direct, se2d_spatialxfeaturelinear, 4-set",kklurz,Directly trained on real data with se2d_spatialxfeaturelinear and static_shared_loaders of the 4-set. Varying number of neurons and images.,2020-05-23 08:17:20


In [153]:
model_name = 'taskdriven_fullgaussian2d'
paths = ['data/static20457-5-9-preproc0.zip']

experiment_name = 'Real, direkt (VGG16 fixed core), taskdriven_fullgaussian2d, VGG16 -> 20457-5-9'.format(model_name)
experiment_comment = 'Direct VGG16 to 20457-5-9 with taskdriven_fullgaussian2d and static_loaders. Varying number of images.'.format(model_name)
fabrikant_name = 'kklurz'

model_fn = "nnsysident.models.models.{}".format(model_name)
dataset_fn = 'nnsysident.datasets.mouse_loaders.static_loaders'

dataset = pd.DataFrame((Dataset & 'dataset_fn = "{}"'.format(dataset_fn)).fetch())
dataset = pd.concat([dataset, dataset['dataset_config'].apply(pd.Series)], axis = 1)
dataset = dataset.loc[[np.isin(row['paths'], [paths])[0] for loc, row in dataset.iterrows()]]
dataset = dataset.loc[dataset["exclude"].str.len() == 0]


model = pd.DataFrame((Model & 'model_fn="{}"'.format(model_fn)).fetch())
for arg in ['neuron_n', 'multi_match_n', 'image_n']:
    model[arg] = [find_number(row.model_comment, arg + '=') for loc, row in model.iterrows()]
model = pd.concat([model, model['model_config'].apply(pd.Series)], axis = 1)
model = model.loc[~(model['pretrained'] == False)]

#model = model.loc[model['multi_match_n'].isnull()] # maybe comment this
#model = model.loc[model['neuron_n'] == 5335] # maybe comment this
#model = model.loc[~model['grid_mean_predictor'].isnull()]


combinations = pd.merge(dataset, model, on=["neuron_n", "image_n"]).sort_values(['neuron_n', 'image_n'])
   
experiment = [{'dataset_hash': row['dataset_hash'], 
               'dataset_fn': row['dataset_fn'],
               'model_hash': row['model_hash'],
               'model_fn': row['model_fn'],
               'trainer_hash': 'd41d8cd98f00b204e9800998ecf8427e',
               'trainer_fn': 'nnsysident.training.trainers.standard_trainer',
                 'experiment_name': experiment_name} for loc, row in combinations.iterrows()]



In [169]:
np.unique(dataset['image_n'])

array([  50.,  200.,  500., 1000., 2500., 4472.])