We simulate a small amount of data from many individual FA models and then we fit the models together and alone and measure the benefit (in terms of the likelihood of held-out test data) of fitting the models together vs. fitting them together. 


For comparison, when fitting FA models individually we use a standard FA fitting package to estimate point estimates for model parameters.  When evaluating models that have been fit together, we use the modes of posterior distributions as point estimates. 


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sklearn.decomposition
import matplotlib.pyplot as plt
import numpy as np
import torch

from probabilistic_model_synthesis.fa import FAMdl
from probabilistic_model_synthesis.fa import Fitter
from probabilistic_model_synthesis.fa import generate_basic_posteriors
from probabilistic_model_synthesis.fa import generate_simple_prior_collection
from probabilistic_model_synthesis.fa import VICollection

## Parameters go here

In [3]:
# Number of individuals we simulate observing data from 
n_individuals = 10

# Range of the number of variables we observe from each individual - the actual number of variables we observe from an
# individual will be pulled uniformly from this range (inclusive)
n_var_range = [100, 120]

# Range of the number of samples we observe for fittig from each individual - the actual number we observe 
# from each individual will be unformly from this range (inclusive)
n_fitting_smps_range = [10, 15]

# Number of latent variables in the model
n_latent_vars = 3

# Number of samples we generate when testing each model
n_test_smps = 1000

## Create the true prior distributions that relate parameters in the model to variable (e.g., neuron) properties

In [4]:
true_priors = generate_simple_prior_collection(n_prop_vars=2, n_latent_vars=n_latent_vars, 
                                               lm_mn_w_init_std=1.0, lm_std_w_init_std=.1,
                                               mn_mn_w_init_std=1.0, mn_std_w_init_std=1.0,
                                               psi_conc_f_w_init_std=2.0, psi_rate_f_w_init_std=1.0, 
                                               psi_conc_bias_mn=10.0, psi_rate_bias_mn=5.0)

## Generate properties

In [5]:
ind_n_vars = np.random.randint(n_var_range[0], n_var_range[1]+1, n_individuals)
ind_n_smps = np.random.randint(n_fitting_smps_range[0], n_fitting_smps_range[1]+1, n_individuals)
ind_props = [torch.rand(size=[n_vars,2]) for n_vars in ind_n_vars]

## Generate true FA models

In [6]:
with torch.no_grad():
    ind_true_fa_mdls = [FAMdl(lm=true_priors.lm_prior.sample(props), mn=true_priors.mn_prior.sample(props).squeeze(), 
                           psi=(true_priors.psi_prior.sample(props).squeeze()))
                        for props in ind_props]

## Generate data for fitting from each model

In [7]:
with torch.no_grad():
    ind_train_data = [mdl.sample(n_smps) for n_smps, mdl in zip(ind_n_smps, ind_true_fa_mdls)]

## Fit FA models together

In [8]:
fit_priors = generate_simple_prior_collection(n_prop_vars=2, n_latent_vars=n_latent_vars)
fit_posteriors = generate_basic_posteriors(n_obs_vars=ind_n_vars, n_smps=ind_n_smps, n_latent_vars=n_latent_vars)

fit_mdls = [FAMdl(lm=None, mn=None, psi=None) for i in range(n_individuals)]

vi_collections = [VICollection(data=data_i[1], props=props_i, mdl=mdl_i, posteriors=posteriors_i) 
                  for data_i, props_i,mdl_i, posteriors_i in zip(ind_train_data, ind_props, fit_mdls, fit_posteriors)]

In [9]:
fitter = Fitter(vi_collections=vi_collections, priors=fit_priors)

In [10]:
logs = [fitter.fit(1000, milestones=[100], update_int=100, init_lr=.1, skip_lm_kl=False, 
                 skip_mn_kl=False, skip_psi_kl=False) for fit_r in range(1)]


Obj: 1.18e+05
----------------------------------------
NELL: 1.00e+04, 1.55e+04, 9.93e+03, 1.08e+04, 1.78e+04, 1.35e+04, 1.00e+04, 8.86e+03, 1.33e+04, 1.32e+04
Latent KL: 3.23e-01, 3.23e-01, 2.98e-01, 3.23e-01, 3.73e-01, 4.78e-01, 2.98e-01, 2.73e-01, 3.53e-01, 3.48e-01
LM KL: 8.79e+02, 8.64e+02, 8.69e+02, 7.46e+02, 6.62e+02, 6.54e+02, 7.78e+02, 7.96e+02, 8.31e+02, 9.31e+02
Mn KL: 1.85e+02, 2.13e+02, 2.06e+02, 1.96e+02, 2.05e+02, 2.12e+02, 1.87e+02, 1.85e+02, 2.12e+02, 2.05e+02
Psi KL: 2.45e+01, 2.75e+01, 2.64e+01, 2.56e+01, 2.65e+01, 2.71e+01, 2.39e+01, 2.39e+01, 2.62e+01, 2.74e+01
----------------------------------------
LR: 0.1

Obj: 3.11e+04
----------------------------------------
NELL: 2.60e+03, 2.90e+03, 2.62e+03, 3.04e+03, 3.20e+03, 2.97e+03, 2.37e+03, 2.15e+03, 2.96e+03, 3.08e+03
Latent KL: 6.72e+01, 7.57e+01, 5.89e+01, 6.35e+01, 8.10e+01, 5.60e+01, 4.84e+01, 5.15e+01, 5.68e+01, 5.62e+01
LM KL: 6.33e+01, 7.67e+01, 5.05e+01, 7.78e+01, 1.42e+02, 8.41e+01, 4.40e+01, 8.31e+01, 7.2

## Fit FA models individually

In [11]:
alone_models = [None]*n_individuals
for ind_i in range(n_individuals):
    mdl = sklearn.decomposition.FactorAnalysis(n_components=n_latent_vars)
    mdl.fit(ind_train_data[ind_i][1].numpy())
    alone_models[ind_i] = mdl

## Measure performance of the fit models on new test data

In [12]:
with torch.no_grad():
    ind_test_data = [mdl.sample(n_test_smps) for mdl in ind_true_fa_mdls]

In [13]:
ind_test_ll = [None]*n_individuals
with torch.no_grad():
    eval_mdl = FAMdl() # Model object we ust just for evaluation 
    for ind_i in range(n_individuals):
    
        mdl_test_data = ind_test_data[ind_i][1]
        
        # Calculate log-likelihood using model fit alone
        
        alone_lm = torch.tensor(alone_models[ind_i].components_.transpose())
        alone_mn = torch.tensor(alone_models[ind_i].mean_)
        alone_psi = torch.tensor(alone_models[ind_i].noise_variance_)
    
        alone_ll = torch.sum(eval_mdl.log_prob(x=mdl_test_data, lm=alone_lm, mn=alone_mn, psi=alone_psi))
        alone_ll = (alone_ll/n_test_smps).numpy().item()
        
        # Calculate log-likelihood using model fit with the other models
        
        comb_lm = vi_collections[ind_i].posteriors.lm_post(ind_props[ind_i])
        comb_mn = vi_collections[ind_i].posteriors.mn_post(ind_props[ind_i]).squeeze()
        comb_psi = vi_collections[ind_i].posteriors.psi_post.mode(ind_props[ind_i]).squeeze()
                             
        comb_ll = torch.sum(eval_mdl.log_prob(x=mdl_test_data, lm=comb_lm, mn=comb_mn, psi=comb_psi))
        comb_ll = (comb_ll/n_test_smps).numpy().item()
        
        ind_test_ll[ind_i] = {'alone': alone_ll, 'comb': comb_ll}
            

In [14]:
ind_test_ll

[{'alone': -260.86021631709866, 'comb': -205.94659423828125},
 {'alone': -270.22096438944305, 'comb': -227.8758087158203},
 {'alone': -289.7494554399177, 'comb': -219.4298858642578},
 {'alone': -262.46040206997964, 'comb': -214.86265563964844},
 {'alone': -253.61787275397472, 'comb': -215.7176513671875},
 {'alone': -285.1326063212853, 'comb': -231.3709716796875},
 {'alone': -254.52365214430822, 'comb': -195.76116943359375},
 {'alone': -268.9499231095329, 'comb': -200.334228515625},
 {'alone': -283.38469520116894, 'comb': -218.39013671875},
 {'alone': -276.2552528035746, 'comb': -220.1079864501953}]