In [1]:
"""
This is a the shrinkage version of Transport map. In this version, 
the regression functions $f_i$ and the niggest parameters $d_i$ are 
assumed to have some specific structures. These values are given by
the parametric covariance matrix. Unlike the example in other notebook,
here we try to estimate the parametric covariance matrix parameters 
using the integrated log-likelihood function.


Author: Anirban Chakraborty,
Last modified: May 13, 2024
"""
%load_ext autoreload
%autoreload 2

### Load necessary libraries

In [None]:
from pathlib import Path

DATAPATH = Path.cwd().parent/"data"
RESULTSPATH = Path.cwd().parent/"results"
PLOTSPATH = Path.cwd().parent/"plots"
if not Path.is_dir(Path.cwd().parent/"results"):
    Path.mkdir(Path.cwd().parent/"results")

if not Path.is_dir(Path.cwd().parent/"plots"):
    Path.mkdir(Path.cwd().parent/"plots")

if not Path.is_dir(RESULTSPATH):
    Path.mkdir(RESULTSPATH)

In [None]:
import torch
import numpy as np
from veccs import orderings
from gpytorch.kernels import MaternKernel
from sklearn.gaussian_process import kernels
from matplotlib import pyplot as plt

from batram.helpers import make_grid, GaussianProcessGenerator
from batram.legmods import Data, SimpleTM
from batram.shrinkmods import ShrinkTM, EstimableShrinkTM


### Comparing log-score with the base transport maps (exponential kernel)

In [None]:
torch.manual_seed(20240522)

In [4]:
## kernel and location parameters

num_locs = 30; dim_locs = 2
nu_original = 0.5
length_scale_original = 0.3
numSamples = 30
sd_noise=1e-6
largest_conditioning_set = 30
sigmasq_f = 1.0

In [5]:
import pickle
with open("../tests/data/NR900ExpLST30SIGSQT10.pkl", "rb") as f:
    #change the data directory if you generate data yourself 
    #by running the scripts/make_data.py file
    data = pickle.load(f)
locs = data["locs"]
locsorder = data["order"]
locs = locs[locsorder, :]
gp = data["gp"]
torchdata = data["data"][:, locsorder]
nn = orderings.find_nns_l2(locs, largest_conditioning_set)

In [6]:
## getting the data ready

numSamples = [1, 2, 5, 10, 20, 30, 50, 80, 160, 200]
reps = 10
logScore_tm = torch.zeros((reps, len(numSamples)))
logScore_shrink = torch.zeros((reps, len(numSamples)))
tm_models = []
shrink_models = []
yreps = 50 #to be used for estimating log-score
nsteps = 400

In [None]:
## fit models
for i, n in enumerate(numSamples):
    #if (n == 1):
    #    theta_init_fixed = torch.tensor([0.0, 0.0, -2.0, 2.0, 0.0, -0.7])
    #else:
    theta_init_fixed = None
    for _reps in range(reps):
        randperm = torch.randperm(torchdata.shape[0])
        obs = (torchdata[randperm, :])[0:n, :] #snip first n samples
        #if obs.dim() == 1:
        #    obs = obs.unsqueeze(0)
        obsTrain = obs
        #if (n > 1):
        #    obs = (obs - obs.mean(dim=0, keepdim=True)) / obs.std(dim=0, keepdim=True)

        # Create a `Data` object for use with the `SimpleTM`/ `ShrinkTM` model.
        data_tm = Data.new(torch.as_tensor(locs).float(), obs, torch.as_tensor(nn))
        data_shrink = Data.new(torch.as_tensor(locs).float(), obs, torch.as_tensor(nn))

        tm = SimpleTM(data_tm, theta_init=None, linear=False, smooth=1.5, nug_mult=4.0)
        opt = torch.optim.Adam(tm.parameters(), lr=0.01)
        sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, nsteps)
        res = tm.fit(
            nsteps, 0.1, test_data=tm.data, optimizer=opt, scheduler=sched, batch_size=300
        )
        tm_models.append(tm)
        
        shrink_tm = EstimableShrinkTM(data=data_shrink, linear=False, 
                        transportmap_smooth=1.5, 
                        parametric_kernel= "exponential",
                        param_nu=0.5,
                        param_ls=1.0,
                        nug_mult_bounded=False,
                        theta_init=theta_init_fixed,
                        )
        
        opt2 = torch.optim.Adam(shrink_tm.parameters(), lr=0.01)
        sched2 = torch.optim.lr_scheduler.CosineAnnealingLR(opt2, nsteps)
        res2 = shrink_tm.fit(
            nsteps, 0.1, test_data=shrink_tm.data, optimizer=opt2, scheduler=sched2, batch_size=300,

        )
        shrink_models.append(shrink_tm)

        testsampnum = 50
        for _j in range(0, testsampnum):
            with torch.no_grad():
                logScore_tm[_reps, i] += tm.score((torchdata[randperm, :])[(200 + _j), :])/testsampnum
            logScore_shrink[_reps, i] += shrink_tm.score((torchdata[randperm, :])[(200 + _j), :])/testsampnum
        print(f"n ={n}, rep {_reps} done")
        print(f"tmscore = {logScore_tm[_reps, i]}, shrinkscore = {logScore_shrink[_reps, i]}")
    

In [8]:
torch.save({
    "gp_generator": gp,
    "tm_models": tm_models,
    "shrink_models": shrink_models,
    "tm_logscore" : logScore_tm,
    "shrink_logscore": logScore_shrink,
    "numSamples": numSamples
}, f"../results/modelsNR_LST{int(100*length_scale_original)}_SQT{int(100*sigmasq_f)}.pt")