In [1]:
%load_ext autoreload
%autoreload
%autoreload 2

In [1]:
from synthetic_torch_helpers import SynH5Dataset, ConvModSyn

from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Loss, Metric
from ignite.handlers import ModelCheckpoint, EarlyStopping

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

import torch
import torchvision
import datetime

import numpy as np
import matplotlib.pyplot as plt

***

# Setting up Data

### Loading Datasets

In [2]:
train_dataset = SynH5Dataset(filename="syn_flux_dataset_v2.h5", load_to_memory=True)
val_dataset = SynH5Dataset(filename="syn_flux_dataset_v2_val.h5", load_to_memory=True)

### Centering the datasets

In [3]:
train_dataset.zs -= np.mean(train_dataset.zs, axis=0)
train_dataset.flux -= np.mean(train_dataset.flux, axis=0)

In [4]:
val_dataset.zs -= np.mean(val_dataset.zs, axis=0)
val_dataset.flux -= np.mean(val_dataset.flux, axis=0)

### Scaling the datasets

In [5]:
z_mss = MinMaxScaler()
train_dataset.zs = z_mss.fit_transform(train_dataset.zs)
val_dataset.zs = z_mss.transform(val_dataset.zs)

flux_mss = MinMaxScaler()
train_dataset.flux = flux_mss.fit_transform(train_dataset.flux[:, 0, :]).reshape(-1, 1, 4563)
val_dataset.flux = flux_mss.transform(val_dataset.flux[:, 0, :]).reshape(-1, 1, 4563)

### Data Loaders

In [53]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=2048, shuffle=True)

***

# Constructing the Model

In [54]:
conv_config = [
    (1, 64, 15),
    (64, 32, 10),
#     Drop out
    (32, 16, 5),
    (16, 8, 5),
#     Drop out
    (8, 4, 3),
    (4, 2, 3)
]

full_config = [
    (1126, 512),
    (512, 256),
    (256, 1)
]

dropout_ixs = {0: .5, 1 : .5, 2: .5}
pooling_ixs = {1 : 2, 3: 2, 5: 2}
mod = ConvModSyn(conv_config, full_config, pooling_ixs, dropout_ixs, torch.nn.Sequential())
mod

ConvModSyn(
  (conv_layers): Sequential(
    (conv_0): Sequential(
      (0): Conv1d(1, 64, kernel_size=(15,), stride=(1,))
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (conv_1): Sequential(
      (0): Conv1d(64, 32, kernel_size=(10,), stride=(1,))
      (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv_2): Sequential(
      (0): Conv1d(32, 16, kernel_size=(5,), stride=(1,))
      (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (conv_3): Sequential(
      (0): Conv1d(16, 8, kernel_size=(5,), stride=(1,))
      (1): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (pool_3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mo

### Optimizer

In [55]:
opt = torch.optim.Adam(mod.parameters(), lr=0.001)

### Loss Function

In [56]:
loss = torch.nn.MSELoss()

### Setting up gpu device

In [57]:
%%bash
echo $CUDA_VISIBLE_DEVICES

1


In [58]:
print(torch.cuda.is_available())
device = torch.device("cuda")

True


### CustonMetric

In [59]:
class SciStandard(Metric):
    """
    Calculates metric to determine how many predictions are within scientific standard.
    """
    
    def __init__(self, output_transform=lambda x: x, z_tol=None):
        super(SciStandard, self).__init__(output_transform)
        if z_tol is None:
            kms = 300
            c = 299792.458 #Speed of Light in kms
            self._z_tol = np.sqrt((1+kms/c)/(1-kms/c))-1
        else:
            self._z_tol = z_tol
    
    def reset(self):
        self._total_in = 0.0
        self._num_examples = 0

    def update(self, output):
        y_pred, y = output
        y_pred, y = z_mss.inverse_transform(y_pred.cpu()), z_mss.inverse_transform(y.cpu())
        self._total_in += np.sum(np.abs(y_pred - y) <= self._z_tol)
        self._num_examples += len(y)

    def compute(self):
        if self._num_examples == 0:
            raise NotComputableError('SciStandard must have at least one example before it can be computed.')
        return self._total_in # self._num_examples

***

# Training Logic

### Setting up trainer and evaluator

In [60]:
trainer = create_supervised_trainer(mod, opt, loss, device=device)
evaluator = create_supervised_evaluator(mod, metrics={'mse': Loss(loss), 'sci': SciStandard()}, device=device)

In [61]:
model_saver = ModelCheckpoint("./models/sess_{}/".format(datetime.datetime.now()), "reg", create_dir=True, 
                              score_function=lambda eng: eng.state.val_standard, score_name='val_loss', n_saved=5)
early_stopper = EarlyStopping(20, score_function=lambda eng: eng.state.val_loss, trainer=trainer)

### Training Events (Experimenting with ignite for pytorch)

In [62]:
iter_level = 50

In [63]:
@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(trainer):
    if (trainer.state.iteration-1) % iter_level == 0:
        print("Epoch[{}], Iter: {}, Loss: {:.5f}".format(trainer.state.epoch, trainer.state.iteration, trainer.state.output))

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_loader)
    metrics = evaluator.state.metrics
    trainer.state.train_loss = metrics['mse']
    print("Training Results - Epoch: {}, SciStandard: {:.5f}, Avg loss: {:.5f}".format(trainer.state.epoch, metrics['sci'], metrics['mse']))

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(trainer):
    evaluator.run(val_loader)
    metrics = evaluator.state.metrics
    trainer.state.val_loss = metrics['mse']
    trainer.state.val_standard = metrics['sci']
    print("Validation Results - Epoch: {}, SciStandard: {:.5f}, Avg loss: {:.5f}".format(trainer.state.epoch, metrics['sci'], metrics['mse']))

In [64]:
trainer.add_event_handler(Events.EPOCH_COMPLETED, model_saver, {'mod': mod})
trainer.add_event_handler(Events.EPOCH_COMPLETED, early_stopper)

## Running the training loop

In [None]:
trainer.run(train_loader, max_epochs=50)

Epoch[1], Iter: 1, Loss: 0.51415
Epoch[1], Iter: 51, Loss: 0.10536
Epoch[1], Iter: 101, Loss: 0.08272
Epoch[1], Iter: 151, Loss: 0.07481
Epoch[1], Iter: 201, Loss: 0.07204
Epoch[1], Iter: 251, Loss: 0.04241
Epoch[1], Iter: 301, Loss: 0.01993
Epoch[1], Iter: 351, Loss: 0.01826
Epoch[1], Iter: 401, Loss: 0.01406
Epoch[1], Iter: 451, Loss: 0.01599
Epoch[1], Iter: 501, Loss: 0.01270
Epoch[1], Iter: 551, Loss: 0.01307
Epoch[1], Iter: 601, Loss: 0.01185
Epoch[1], Iter: 651, Loss: 0.01589
Epoch[1], Iter: 701, Loss: 0.01016
Epoch[1], Iter: 751, Loss: 0.01558
Epoch[1], Iter: 801, Loss: 0.01099
Epoch[1], Iter: 851, Loss: 0.01418
Epoch[1], Iter: 901, Loss: 0.01192
Epoch[1], Iter: 951, Loss: 0.00872
Epoch[1], Iter: 1001, Loss: 0.00561
Epoch[1], Iter: 1051, Loss: 0.00601
Epoch[1], Iter: 1101, Loss: 0.01228
Epoch[1], Iter: 1151, Loss: 0.01427
Epoch[1], Iter: 1201, Loss: 0.00746
Epoch[1], Iter: 1251, Loss: 0.00595
Epoch[1], Iter: 1301, Loss: 0.00927
Epoch[1], Iter: 1351, Loss: 0.00894
Epoch[1], Iter:

***

# How many in desired range

In [26]:
kms = 300
c = 299792.458 #Speed of Light in kms
delta_z_tolerance = np.sqrt((1+kms/c)/(1-kms/c))-1
delta_z_tolerance

0.0010011934795353117

In [54]:
mod.load_state_dict(torch.load("./models/sess_2019-06-12 14:38:20.542320/reg_mod_2_val_loss=0.002978609.pth"))
mod.eval()

ConvModSyn(
  (conv_layers): Sequential(
    (conv_0): Sequential(
      (0): Conv1d(1, 64, kernel_size=(10,), stride=(1,))
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (conv_1): Sequential(
      (0): Conv1d(64, 32, kernel_size=(2,), stride=(1,))
      (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): Sequential(
    (fc_0): Sequential(
      (0): Linear(in_features=72832, out_features=512, bias=True)
      (1): ReLU()
    )
    (dropout_0): Dropout(p=0.5)
    (fc_1): Sequential(
      (0): Linear(in_features=512, out_features=256, bias=True)
      (1): ReLU()
    )
    (dropout_1): Dropout(p=0.5)
    (fc_2): Sequential(
      (0): Linear(in_features=256, out_features=128, bias=True)
      (1): ReLU()
    )
    (fc_5): Linear(in_features=128, out_f

In [55]:
mod.eval()
with torch.no_grad():
    ttl = 0
    for X, y in val_loader:
        X, y = X.cuda(), y.cuda()
        diff = mod(X) - y
        ttl += torch.sum(torch.abs(diff) <= delta_z_tolerance)
#         ttl += torch.sum((diff <= delta_z_tolerance) & (diff >= -delta_z_tolerance))
    ttl

In [57]:
ttl.item()/len(val_dataset)

0.01388