# train__embedding_spatiotemporal_similar

In [13]:
import datetime
import logging
import os
import numpy as np
import torch
from importlib import reload
import matplotlib.pyplot as plt

from models import dapm
from scripts.data_loader import *
from scripts.train_dapm import train
from utils.metrics import normalize_mat
from params import Param
from utils.logging_utils import *

import warnings
warnings.filterwarnings('ignore')


In [14]:

def dapm_main(param, **kwargs):
    
    """ define model name """ 
    model_name = param.generate_model_name()
    ae_model_name = param.generate_ae_model_name()
    print(model_name)
    print(ae_model_name)

    kwargs['model_name'] = model_name
    kwargs['model_file'] = os.path.join(kwargs['model_dir'], model_name + '.pkl')
    kwargs['log_file'] = os.path.join(kwargs['log_dir'], model_name + '.log')
    kwargs['run_file'] = os.path.join(kwargs['run_dir'], model_name + '_run_{}'.format(datetime.datetime.now().strftime('%d%H%m')))
    kwargs['ae_model_file'] = os.path.join('./data/ae_models/models/', ae_model_name + '.pkl')

    """ load data """
    data_dir = f'/home/yijun/notebooks/training_data/'
    data_obj = load_data(data_dir, param)
    train_loc, val_loc, test_loc = load_locations(kwargs['train_val_test'], param)
    
    data_obj.train_loc = train_loc
    data_obj.train_y = data_obj.gen_train_val_test_label(data_obj.label_mat, data_obj.train_loc)
    data_obj.val_loc = val_loc
    data_obj.val_y = data_obj.gen_train_val_test_label(data_obj.label_mat, data_obj.val_loc)
    data_obj.test_loc = test_loc
    data_obj.test_y = data_obj.gen_train_val_test_label(data_obj.label_mat, data_obj.test_loc)
    
    """ logging starts """
    start_logging(kwargs['log_file'], model_name)
    data_logging(data_obj)

    """ load ae model """
    ae = torch.load(kwargs['ae_model_file'])
    
    """ define DeepAP model
    in_dim, ae_en_h_dims, ae_de_h_dims
    conv_lstm_in_size, conv_lstm_in_dim, conv_lstm_h_dim, conv_lstm_kernel_sizes, conv_lstm_n_layers
    fc_in_dim, fc_h_dims, fc_out_dim  """
#     model = dapm.DeepAPM(in_dim=data_obj.n_features,
#                          ae_en_h_dims=param.ae_en_h_dims,
#                          ae_de_h_dims=param.ae_de_h_dims,
                               
#                          conv_lstm_in_size=(data_obj.n_rows, data_obj.n_cols),
#                          conv_lstm_in_dim=param.ae_en_h_dims[-1],  
#                          conv_lstm_h_dim=[param.dapm_h_dim],  # dap_h_dim
#                          conv_lstm_kernel_sizes=param.kernel_sizes,  # kernel_sizes
#                          conv_lstm_n_layers=1,
                               
#                          fc_in_dim=param.dapm_h_dim * len(param.kernel_sizes),
#                          fc_h_dims=param.fc_h_dims,  # fc_h_dims
#                          fc_out_dim=1,
                                    
#                          ae_pretrain_weight=ae.state_dict(),
#                          mask_thre=param.mask_thre,
#                          fc_p_dropout=0.1,
#                          device=kwargs['device'])
   
#     model = model.to(kwargs['device'])
    
    model = torch.load(f'data/dapm_models/models/{param.area}_{param.resolution}m_{param.year}___#{param.months[0]}#.pkl')
    model = model.to(kwargs['device'])
    for p in model.parameters():
        p.requires_grad = True
    
    train(model, data_obj, param, **kwargs)

    """ logging ends """
    end_logging(model_name)
    

In [15]:
from scipy.optimize import curve_fit

def gaussian(h, r, s, n=0):
    return n + s * (1. - np.exp(- (h ** 2 / (r / 2.) ** 2)))


def get_fit_bounds(x, y):
    n = np.nanmin(y)
    r = np.nanmax(x)
    s = np.nanmax(y)
    return (0, [r, s, n])


def get_fit_func(x, y, model):
    try:
        bounds = get_fit_bounds(x, y)
        popt, _ = curve_fit(model, x, y, method='trf', p0=bounds[1], bounds=bounds)
        return popt
    except Exception as e:
        return [0, 0, 0]


def gen_semivariogram(distances, variances, bins, thr):
        
    valid_variances, valid_bins = [], []
    for b in range(len(bins) - 1):
        left, right = bins[b], bins[b + 1]
        mask = (distances >= left) & (distances < right)
        if np.count_nonzero(mask) > thr:
            v = np.nanmean(variances[mask])
            d = np.nanmean(distances[mask])
            valid_variances.append(v)
            valid_bins.append(d)
            
    x, y = np.array(valid_bins), np.array(valid_variances)
    popt = get_fit_func(x, y, model=gaussian)                        
    return popt
    

In [16]:
import logging
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as dat
from tensorboardX import SummaryWriter
from torch import autograd

from utils.early_stopping import EarlyStopping
from utils.metrics import compute_error
from models.spatial_loss_func import SpatialLossFunc


def train(dapm, data_obj, args, **kwargs):
    
    """ construct index-based data loader """
    idx = np.array([i for i in range(args.seq_len + 1, data_obj.train_y.shape[0])])
    idx_dat = dat.TensorDataset(torch.tensor(idx, dtype=torch.int32))
    train_idx_data_loader = dat.DataLoader(dataset=idx_dat, batch_size=args.batch_size, shuffle=True)
    
    idx = np.array([i for i in range(args.seq_len, data_obj.test_y.shape[0])])
    idx_dat = dat.TensorDataset(torch.tensor(idx, dtype=torch.int32))
    test_idx_data_loader = dat.DataLoader(dataset=idx_dat, batch_size=1, shuffle=False)

    """ set writer, loss function, and optimizer """
    writer = SummaryWriter(kwargs['run_file'])
    loss_func = nn.MSELoss()
    spatial_loss_func = SpatialLossFunc(sp_neighbor=args.sp_neighbor) 
    optimizer = optim.Adam(dapm.parameters(), lr=args.lr, weight_decay=1e-8)
    early_stopping = EarlyStopping(patience=7, verbose=True)

    for epoch in range(args.epochs):

        dapm.train()
        total_losses, train_losses, val_losses, alosses = [], [], [], []

        for _, idx in enumerate(train_idx_data_loader):
            batch_idx = idx[0]
            
            ############################
            # construct sequence input #
            ############################

            def construct_sequence_x(idx_list, dynamic_x, static_x):
                d_x = [dynamic_x[i - args.seq_len: i + 1, ...] for i in idx_list]
                d_x = np.stack(d_x, axis=0)
                s_x = np.expand_dims(static_x, axis=0)
                s_x = np.repeat(s_x, args.seq_len + 1, axis=1)  # (t, c, h, w)
                s_x = np.repeat(s_x, len(idx_list), axis=0)  # (b, t, c, h, w)
                x = np.concatenate([d_x, s_x], axis=2)
                return torch.tensor(x, dtype=torch.float).to(kwargs['device'])

            def construct_y(idx_list, output_y):
                y = [output_y[i] for i in idx_list]
                y = np.stack(y, axis=0)
                return torch.tensor(y, dtype=torch.float).to(kwargs['device'])


            batch_x = construct_sequence_x(batch_idx, data_obj.dynamic_x, data_obj.static_x)  # x = (b, t, c, h, w)
            batch_y = construct_y(batch_idx, data_obj.train_y)  # y = (b, c, h, w)
            batch_val_y = construct_y(batch_idx, data_obj.val_y)

            ###################
            # train the model #
            ###################

            out, masked_x, _, de_x, em = dapm(batch_x)
            train_loss = loss_func(batch_y[~torch.isnan(batch_y)], out[~torch.isnan(batch_y)])
            train_losses.append(train_loss.item())

            # add loss according to the model type
            total_loss = train_loss
            if 'sp' in args.model_type:
                mask_layer_params = torch.cat([x.view(-1) for x in dapm.mask_layer.parameters()])
                l1_regularization = torch.norm(mask_layer_params, 1)
                total_loss += l1_regularization * args.alpha

            if 'ae' in args.model_type:
                ae_loss = loss_func(masked_x, de_x)
                total_loss += ae_loss * args.gamma
                
            if 'sc' in args.model_type:
                sp_loss = spatial_loss_func(out)
                total_loss += sp_loss * args.beta

            if 'embedding_spatiotemporal_similar' in args.model_type:
                
                # 1-step temporal neighboring loss
                pre_batch_idx = batch_idx - torch.ones_like(batch_idx)
                pre_batch_x = construct_sequence_x(pre_batch_idx, data_obj.dynamic_x, data_obj.static_x)  # x = (b, t, c, h, w)
                _, _, _, _, pre_em = dapm(pre_batch_x)
                tp_loss = torch.mean(torch.mean((em - pre_em) ** 2, axis=1))
                
                # 1-step spatial neighboring loss
                sp_loss = 0.
                sp_loss += torch.mean(torch.mean((em[..., 1:, 1:] - em[..., :-1, :-1]) ** 2, axis=1)) 
                sp_loss += torch.mean(torch.mean((em[..., 1:, :] - em[..., :-1, :]) ** 2, axis=1)) 
                sp_loss += torch.mean(torch.mean((em[..., :, 1:] - em[..., :, :-1]) ** 2, axis=1)) 
                alosses.append(tp_loss.item() + sp_loss.item())
                total_loss += (tp_loss + sp_loss) * args.eta

            total_losses.append(total_loss.item())

            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

            ######################
            # validate the model #
            ######################

            val_loss = loss_func(batch_val_y[~torch.isnan(batch_val_y)], out[~torch.isnan(batch_val_y)])
            val_losses.append(val_loss.item())

        avg_total_loss = np.average(total_losses)
        avg_train_loss = np.average(train_losses)
        avg_a_loss = np.average(alosses)
        avg_val_loss = np.average(val_losses)

        # write for tensorboard visualization
        writer.add_scalar('data/train_loss', avg_total_loss, epoch)
        writer.add_scalar('data/val_loss', avg_val_loss, epoch)

        logging.info(f'Epoch [{epoch}/{args.epochs}] total_loss = {avg_total_loss:.4f}, train_loss = {avg_train_loss:.4f}, a_loss = {avg_a_loss:.4f}, valid_loss = {avg_val_loss:.4f}.')

        ##################
        # early_stopping #
        ##################

        early_stopping(avg_val_loss, dapm, kwargs['model_file'])

        #########################
        # evaluate testing data #
        #########################
        
        if early_stopping.counter < 2 and epoch % 2 == 0:
            
            dapm.eval()
            predictions = []

            with torch.no_grad():
                for i, data in enumerate(test_idx_data_loader):
                    batch_idx = data[0]
                    batch_x = construct_sequence_x(batch_idx, data_obj.dynamic_x, data_obj.static_x)  # x = (b, t, c, h, w)
                    out, _, _, _, _ = dapm(batch_x)
                    predictions.append(out.cpu().data.numpy())

            prediction = np.concatenate(predictions)
            rmse, mape, r2 = compute_error(data_obj.test_y[args.seq_len:, ...], prediction)
            writer.add_scalar('data/test_rmse', rmse, epoch)
            logging.info(f'Testing: RMSE = {rmse:.4f}, MAPE = {mape:.4f}, R2 = {r2:.4f}.')

        if early_stopping.early_stop:
            logging.info(kwargs['model_name'] + f' val_loss = {early_stopping.val_loss_min:.4f}.')
            logging.info('Early stopping')
            break


In [17]:
"""
    define directory
"""

base_dir = f'data/los_angeles_500m_embedding_spatiotemporal_similar_1234_tp1/'
train_val_test_file = f'/home/yijun/notebooks/training_data/train_val_test_los_angeles_500m_fine_tune_1234.json'
device = torch.device("cuda:3" if torch.cuda.is_available() else 'cpu')  # the gpu device

""" load train, val, test locations """
f = open(train_val_test_file, 'r')
train_val_test = json.loads(f.read())

kwargs = {
    'model_dir': os.path.join(base_dir, 'models/'),
    'log_dir': os.path.join(base_dir, 'logs/'),
    'run_dir': os.path.join(base_dir, 'runs/'),
    'train_val_test': train_val_test,
    'device': device
}


In [None]:
param = Param([1], 2019, alpha=1, beta=0.1, gamma=1, eta=2, sp_neighbor=1, lr=0.001, model_type=['sp', 'ae', 'sc', 'embedding_spatiotemporal_similar', 'from_stratch'])
dapm_main(param, **kwargs)           


dapm___sp_ae_sc_embedding_spatiotemporal_similar_from_stratch___los_angeles_500m_2019___#01#___6_00001_1___1_01_1_2___16_13
ae___los_angeles_500m_2019___#01#___16
