### Imports

In [7]:
import pickle as pkl
import pandas as pd
import json
import os
import sys
import torch
import torch.optim as optim
import shap
from matplotlib import pyplot as plt
from torch.autograd import Variable

#### Import model definitions <br>
The relevant change in the model definition to receive the neural embeddings is in "EmbeddingFullyConnectedSingleGrid3L"

In [15]:
# %load /home/ninow/master_thesis/code/project/model/Baseline.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys


class AbstractNetwork(nn.Module):
    n_station_features = 3
    n_time_features = 5

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

    def getNumberOfAdditionalFeatures(self, useSTI, useTimeData):
        return (self.n_station_features if useSTI else 0) + (self.n_time_features if useTimeData else 0)

    def getAdditionaFeatureHandling(self, useSTI, useTimeData):
        if useSTI and useTimeData:
            return lambda x, time_features, station_data: torch.cat((x, time_features, station_data), 1)
        elif useSTI:
            return lambda x, _, station_data: torch.cat((x, station_data), 1)
        elif useTimeData:
            return lambda x, time_features, _: torch.cat((x, time_features), 1)
        else:
            return lambda x, *_: x


class CNN(AbstractNetwork):
    def conv_out_size(self, grid_in, grid_conv, stride):
        return int((grid_in - grid_conv) / stride + 1)


class CNN0L(CNN):
    def __init__(self, filter_conv1, grid_conv1, stride, n_parameters, n_time_invariant_parameters, n_grid,
                 useSTI, useTimeData, prediction_times):
        super(CNN0L, self).__init__()
        self.prediciton_times = prediction_times
        self.n_predictions = len(prediction_times)
        self.conv1 = nn.Conv2d(n_parameters + 1 + n_time_invariant_parameters, filter_conv1, grid_conv1)
        self.addNonGridFeatures = self.getAdditionaFeatureHandling(useSTI, useTimeData)
        self.fc1 = nn.Linear(
            filter_conv1 * (self.conv_out_size(n_grid, grid_conv1, stride) ** 2) + self.getNumberOfAdditionalFeatures(
                useSTI, useTimeData), self.n_predictions)

    def forward(self, x, time_features, station_features):
        x = F.relu(self.conv1(x))
        x = x.view(-1, self.num_flat_features(x))
        x = self.addNonGridFeatures(x, time_features, station_features)
        x = self.fc1(x)
        return x


class CNN1L(CNN0L):
    def __init__(self, filter_conv1, grid_conv1, stride, n_fc1, n_parameters, n_time_invariant_parameters, n_grid,
                 useSTI, useTimeData, prediction_times):
        super(CNN1L, self).__init__(filter_conv1, grid_conv1, stride, n_parameters, n_time_invariant_parameters,
                                    n_grid, useSTI, useTimeData, prediction_times)
        self.fc1 = nn.Linear(
            filter_conv1 * (self.conv_out_size(n_grid, grid_conv1, stride) ** 2) + self.getNumberOfAdditionalFeatures(
                useSTI, useTimeData), n_fc1)
        self.fc2 = nn.Linear(n_fc1, self.n_predictions)

    def forward(self, x, time_features, station_features):
        x = F.relu(super(CNN1L, self).forward(x, time_features, station_features))
        x = self.fc2(x)
        return x


class CNN2L(CNN1L):
    def __init__(self, filter_conv1, grid_conv1, stride, n_fc1, n_fc2, n_parameters, n_time_invariant_parameters,
                 n_grid,
                 useSTI, useTimeData, prediction_times):
        super(CNN2L, self).__init__(filter_conv1, grid_conv1, stride, n_fc1, n_parameters,
                                    n_time_invariant_parameters, n_grid,
                                    useSTI, useTimeData, prediction_times)
        self.fc2 = nn.Linear(n_fc1, n_fc2)
        self.fc3 = nn.Linear(n_fc2, self.n_predictions)

    def forward(self, x, time_features, station_features):
        x = F.relu(super(CNN2L, self).forward(x, time_features, station_features))
        x = self.fc3(x)
        return x


class FullyConnected1L(AbstractNetwork):
    def __init__(self, n_fc1, n_parameters, n_time_invariant_parameters, n_grid, useSTI, useTimeData, prediction_times):
        super(FullyConnected1L, self).__init__()
        self.prediciton_times = prediction_times
        self.n_predictions = len(prediction_times)
        # n_parameters: features of cosmo grid depending on lead time, 1: temperature of lead time = 0,
        # n_time_invariant_parameters: time invariant grid features such as soil type, fraction of land, height diff.
        # to station, etc.
        self.fc1 = nn.Linear((n_parameters + 1 + n_time_invariant_parameters) * (n_grid ** 2) + self.getNumberOfAdditionalFeatures(useSTI, useTimeData), n_fc1)
        self.addNonGridFeatures = self.getAdditionaFeatureHandling(useSTI, useTimeData)
        self.fc2 = nn.Linear(n_fc1, self.n_predictions)

    def forward(self, x, time_features, station_features):
        x = x.view(-1, self.num_flat_features(x))
        x = self.addNonGridFeatures(x, time_features, station_features)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


class FullyConnected2L(FullyConnected1L):
    def __init__(self, n_fc1, n_fc2, n_parameters, n_time_invariant_parameters, n_grid, useSTI,
                 useTimeData, prediction_times):
        super(FullyConnected2L, self).__init__(n_fc1, n_parameters, n_time_invariant_parameters, n_grid,
                                               useSTI, useTimeData, prediction_times)
        self.fc2 = nn.Linear(n_fc1 + self.getNumberOfAdditionalFeatures(useSTI, useTimeData), n_fc2)
        self.fc3 = nn.Linear(n_fc2, self.n_predictions)

    def forward(self, x, time_features, station_features):
        x = F.relu(super(FullyConnected2L, self).forward(x, time_features, station_features))
        x = self.fc3(x)
        return x


class FullyConnected3L(FullyConnected2L):
    def __init__(self, n_fc1, n_fc2, n_fc3, n_parameters, n_time_invariant_parameters, n_grid, useSTI,
                 useTimeData, prediction_times):
        super(FullyConnected3L, self).__init__(n_fc1, n_fc2, n_parameters, n_time_invariant_parameters, n_grid,
                                               useSTI, useTimeData, prediction_times)
        self.fc3 = nn.Linear(n_fc2, n_fc3)
        self.fc4 = nn.Linear(n_fc3, self.n_predictions)

    def forward(self, x, time_features, station_features):
        x = F.relu(super(FullyConnected3L, self).forward(x, time_features, station_features))
        x = self.fc4(x)
        return x
    
class FullyConnectedSingleGrid1L(AbstractNetwork):
    def __init__(self, n_fc1, n_parameters, n_time_invariant_parameters, useSTI, useTimeData, prediction_times, n_points=1, droupout_prob=0):
        super(FullyConnectedSingleGrid1L, self).__init__()
        self.prediciton_times = prediction_times
        self.n_predictions = len(prediction_times)
        # n_parameters: features of cosmo grid depending on lead time, 1: temperature of lead time = 0,
        # n_time_invariant_parameters: time invariant grid features such as soil type, fraction of land, height diff.
        # to station, etc.
        self.addNonGridFeatures = self.getAdditionaFeatureHandling(useSTI, useTimeData)
        self.fc1 = nn.Linear(((n_parameters + 1 + n_time_invariant_parameters) * n_points + self.getNumberOfAdditionalFeatures(useSTI, useTimeData)), n_fc1)
        self.fc2 = nn.Linear(n_fc1, self.n_predictions)
        self.dropout1 = nn.Dropout(p=droupout_prob)
        print('Model uses dropout probability: %s' % droupout_prob)

    def forward(self, x, time_features, station_features):
        try:
            # this awkward squeezing is necessary because we got problems, when the batch is randomly 1 (due to not
            # dividable training or test set size by the batch size)
            x = self.addNonGridFeatures(x.squeeze(dim=-1).squeeze(dim=-1), time_features, station_features)
        except RuntimeError:
            sys.stderr.write("Shapes X, time_features, station_features: " + str(x.shape) + str(time_features.shape) + str(station_features.shape))
            sys.stderr.write("X: " + str(x))
            sys.stderr.write("time_features: ", time_features)
            sys.stderr.write("station_features: ", station_features)
            raise

        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        return x


class FullyConnectedSingleGrid2L(FullyConnectedSingleGrid1L):
    def __init__(self, n_fc1, n_fc2, n_parameters, n_time_invariant_parameters, useSTI, useTimeData, prediction_times,n_points=1, droupout_prob=0):
        super(FullyConnectedSingleGrid2L, self).__init__(n_fc1, n_parameters, n_time_invariant_parameters,
                                               useSTI, useTimeData, prediction_times,n_points,droupout_prob=droupout_prob)
        self.fc2 = nn.Linear(n_fc1, n_fc2)
        self.fc3 = nn.Linear(n_fc2, self.n_predictions)
        self.dropout2 = nn.Dropout(p=droupout_prob)
        self.i = 0

    def forward(self, x, time_features, station_features):
        x = F.relu(super(FullyConnectedSingleGrid2L, self).forward(x, time_features, station_features))
        x = self.dropout2(x)
        x = self.fc3(x)
        self.i = self.i + 1
        return x


class FullyConnectedSingleGrid3L(FullyConnectedSingleGrid2L):
    def __init__(self, n_fc1, n_fc2, n_fc3, n_parameters, n_time_invariant_parameters, useSTI, useTimeData, prediction_times,n_points=1,droupout_prob=0):
        super(FullyConnectedSingleGrid3L, self).__init__(n_fc1, n_fc2, n_parameters, n_time_invariant_parameters,
                                               useSTI, useTimeData, prediction_times,n_points, droupout_prob=droupout_prob)
        self.fc3 = nn.Linear(n_fc2, n_fc3)
        self.fc4 = nn.Linear(n_fc3, self.n_predictions)
        self.dropout3 = nn.Dropout(p=droupout_prob)

    def forward(self, x, time_features, station_features):
        x = F.relu(super(FullyConnectedSingleGrid3L, self).forward(x, time_features, station_features))
        x = self.dropout3(x)
        x = self.fc4(x)
        return x
    
class EmbeddingFullyConnectedSingleGrid3L(FullyConnectedSingleGrid3L):
    def __init__(self, n_fc1, n_fc2, n_fc3, n_parameters, n_time_invariant_parameters, useSTI, useTimeData, prediction_times,n_points=1,droupout_prob=0):
        super(EmbeddingFullyConnectedSingleGrid3L, self).__init__(n_fc1, n_fc2, n_fc3, n_parameters, n_time_invariant_parameters, useSTI, useTimeData, prediction_times,n_points, droupout_prob=droupout_prob)
        self.fc3 = nn.Linear(n_fc2, n_fc3)
        self.fc4 = nn.Linear(n_fc3, self.n_predictions)

    def forward(self, x, time_features, station_features):
        x = F.relu(super(FullyConnectedSingleGrid3L, self).forward(x, time_features, station_features))
        return x


def model_factory(model_dict, params, time_invariant_params, grid, prediction_times):
    print(model_dict)
    # CNN
    if model_dict['type'] == 'CNN':
        if not 'fc_layers' in model_dict:
            return CNN0L(filter_conv1=model_dict['n_conv1'],
                         grid_conv1=model_dict['grid_conv1'],
                         stride=model_dict['stride'],
                         n_parameters=params,
                         n_time_invariant_parameters=time_invariant_params,
                         n_grid=grid,
                         useSTI=model_dict[
                             'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                         useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                         prediction_times=prediction_times)

        elif model_dict['fc_layers'] == 1:
            return CNN1L(filter_conv1=model_dict['n_conv1'],
                         grid_conv1=model_dict['grid_conv1'],
                         stride=model_dict['stride'],
                         n_fc1=model_dict['n_fc1'],
                         n_parameters=params,
                         n_time_invariant_parameters=time_invariant_params,
                         n_grid=grid,
                         useSTI=model_dict[
                             'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                         useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                         prediction_times=prediction_times)
        elif model_dict['fc_layers'] == 2:
            return CNN2L(filter_conv1=model_dict['n_conv1'],
                         grid_conv1=model_dict['grid_conv1'],
                         stride=model_dict['stride'],
                         n_fc1=model_dict['n_fc1'],
                         n_fc2=model_dict['n_fc2'],
                         n_parameters=params,
                         n_time_invariant_parameters=time_invariant_params,
                         n_grid=grid,
                         useSTI=model_dict[
                             'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                         useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                         prediction_times=prediction_times)

    # Fully connected network
    elif model_dict['type'] == 'FC':
        if grid == 1:
            if 'knn' in model_dict:
                n_points = 3
            else:
                n_points = 1
            if model_dict['fc_layers'] == 1:
                return FullyConnectedSingleGrid1L(n_fc1=model_dict['n_fc1'],
                                        n_parameters=params,
                                        n_time_invariant_parameters=time_invariant_params,
                                        useSTI=model_dict[
                                            'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                                        useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                                        prediction_times=prediction_times,
                                        n_points=n_points,
                                        droupout_prob=model_dict['dropout_prop'] if 'dropout_prop' in model_dict else 0)
            elif model_dict['fc_layers'] == 2:
                return FullyConnectedSingleGrid2L(n_fc1=model_dict['n_fc1'],
                                        n_fc2=model_dict['n_fc2'],
                                        n_parameters=params,
                                        n_time_invariant_parameters=time_invariant_params,
                                        useSTI=model_dict[
                                            'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                                        useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                                        prediction_times=prediction_times,
                                        n_points=n_points,
                                        droupout_prob=model_dict['dropout_prop'] if 'dropout_prop' in model_dict else 0)
            elif model_dict['fc_layers'] == 3:
                return EmbeddingFullyConnectedSingleGrid3L(n_fc1=model_dict['n_fc1'],
                                        n_fc2=model_dict['n_fc2'],
                                        n_fc3=model_dict['n_fc3'],
                                        n_parameters=params,
                                        n_time_invariant_parameters=time_invariant_params,
                                        useSTI=model_dict[
                                            'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                                        useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                                        prediction_times=prediction_times,
                                        n_points=n_points,
                                        droupout_prob=model_dict['dropout_prop'] if 'dropout_prop' in model_dict else 0)

        else:
            if model_dict['fc_layers'] == 1:
                return FullyConnected1L(n_fc1=model_dict['n_fc1'],
                                        n_parameters=params,
                                        n_time_invariant_parameters=time_invariant_params,
                                        n_grid=grid,
                                        useSTI=model_dict[
                                            'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                                        useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                                        prediction_times=prediction_times)
            elif model_dict['fc_layers'] == 2:
                return FullyConnected2L(n_fc1=model_dict['n_fc1'],
                                        n_fc2=model_dict['n_fc2'],
                                        n_parameters=params,
                                        n_time_invariant_parameters=time_invariant_params,
                                        n_grid=grid,
                                        useSTI=model_dict[
                                            'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                                        useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                                        prediction_times=prediction_times)
            elif model_dict['fc_layers'] == 3:
                return FullyConnected3L(n_fc1=model_dict['n_fc1'],
                                        n_fc2=model_dict['n_fc2'],
                                        n_fc3=model_dict['n_fc3'],
                                        n_parameters=params,
                                        n_time_invariant_parameters=time_invariant_params,
                                        n_grid=grid,
                                        useSTI=model_dict[
                                            'station_time_invariant'] if 'station_time_invariant' in model_dict else True,
                                        useTimeData=model_dict['useTimeData'] if 'useTimeData' in model_dict else True,
                                        prediction_times=prediction_times)

    raise Exception('No matching model found for name \"%s\"' % model_dict['name'])


### Get input data <br>
We have to define, where to find the network ready training and test data is located

In [8]:
train_set = pd.read_pickle('/mnt/ds3lab-scratch/ninow/preprocessed_data/network_ready_data/train_data')
test_set = pd.read_pickle('/mnt/ds3lab-scratch/ninow/preprocessed_data/network_ready_data/test_data')

In [9]:
config = pkl.load(open('/mnt/ds3lab-scratch/ninow/final_results/network/model_predictions/knn_1/shap/config.pkl', 'rb'))
config['experiment_path'] = '/mnt/ds3lab-scratch/ninow/final_results/network/model_predictions/error_label'
model = config['model']
model['path'] = '/mnt/ds3lab-scratch/ninow/final_results/network/model_predictions/error_label/models/FC_L3_1024_256_64_TD_GTI_STI.txt'
config['model'] = model
config['trained_model_output_path'] = '/mnt/ds3lab-scratch/ninow/final_results/network/model_predictions/error_label/FC_L3_1024_256_64_TD_GTI_STI_grid_1_bs_64_tf_0.1_optim_sgd_lr_0.0005_sl_24/'

## Define wrapper to use pytorch NN

In [10]:
def network_output_wrapper(net, X):
    grid_data = getVariable(torch.Tensor(X[:,:22])).float()
    if X.shape[1] >= 30:
        station_time_inv_input = getVariable(torch.Tensor(X[:,22:25])).float()
    else:
        station_time_inv_input = None
    time_data = getVariable(torch.Tensor(X[:,-5:])).float()
    return net(grid_data, time_data, station_time_inv_input).data.cpu().numpy()

# wrapper for pytorch network output
f = lambda X: network_output_wrapper(net=net, X=X)

In [11]:
def getVariable(tensor):
    if torch.cuda.is_available():
        input = Variable(tensor.cuda())
    else:
        input = Variable(tensor)
    return input

In [12]:
def load_checkpoint(path, model, optimizer):
    file_path = path + '/model_best.pth.tar'
    if os.path.isfile(file_path):
        print("Loading checkpoint from: %s" % file_path)
        checkpoint = torch.load(file_path)
        epoch = checkpoint['epoch'] + 1
        best_epoch_test_rmse = checkpoint['best_epoch_test_rmse']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("Loaded checkpoint with best test rmse %s (epoch %s)" % (best_epoch_test_rmse, checkpoint['epoch']))
    else:
        raise Exception("No checkpoint found at %s" % file_path)
    return model, optimizer, epoch, best_epoch_test_rmse

## Load trained network

In [16]:
# assign all program arguments to local variables
with open(config['model']['path']) as handle:
    ModelDict = json.loads(handle.read())

# initialize network, optimizer and loss function£
net = model_factory(model_dict=ModelDict, params=len(config['input_parameters']),
                             time_invariant_params=len(config['grid_time_invariant_parameters']),
                             grid=config['grid_size'], prediction_times=config['prediction_times'])

# get optimizer config
optimizer_config = config['optimizer']

# if not explicitly specified, don't use regularization
if 'weight_decay' not in optimizer_config: optimizer_config['weight_decay'] = 0

optimizer = optim.SGD(net.parameters(), lr=optimizer_config['learning_rate'],
                            momentum=optimizer_config['momentum'], weight_decay=optimizer_config['weight_decay'])


net, optimizer, *_ = load_checkpoint(config['trained_model_output_path'] + '/stored_models/run_0', model=net,
                                                optimizer=optimizer)
if torch.cuda.is_available():
    net.cuda()

{'layers': 3, 'type': 'FC', 'fc_layers': 3, 'name': 'FC_L3_1024_256_64_TD_GTI_STI', 'time_data': True, 'n_fc1': 1024, 'station_time_invariant': True, 'n_fc3': 64, 'grid_time_invariant': True, 'n_fc2': 256}
Model uses dropout probability: 0
Loading checkpoint from: /mnt/ds3lab-scratch/ninow/final_results/network/model_predictions/error_label/FC_L3_1024_256_64_TD_GTI_STI_grid_1_bs_64_tf_0.1_optim_sgd_lr_0.0005_sl_24//stored_models/run_0/model_best.pth.tar
Loaded checkpoint with best test rmse 1.2687412554348299 (epoch 27)


### Prepare data

In [17]:
# fix seed to randomly sample 1000 samples from ~1M train points
seed = 17257
# create training and test set
training_data = train_set.iloc[:,2:-2]
training_labels = train_set.iloc[:,-1]-train_set.iloc[:,-2]
test_data = test_set.iloc[:,2:-2]
test_labels = test_set.iloc[:,-1]-test_set.iloc[:,-2]

### Create neural embeddings

In [18]:
# calculate embeddings
training_embeddings = network_output_wrapper(net, training_data.as_matrix())
test_embeddings = network_output_wrapper(net, test_data.as_matrix())
# generate pandas data fram by adding init and station for each embedding
train_embeddings_data_frame = pd.concat((train_set[['Init', 'Station']], pd.DataFrame(data=training_embeddings)), axis=1)
test_embeddings_data_frame = pd.concat((test_set[['Init', 'Station']], pd.DataFrame(data=test_embeddings)), axis=1)

### Create final dataset

In [20]:
# create dictionary to be dumped
neural_embeddings = {'train_embeddings' : train_embeddings_data_frame, 'train_labels' : training_labels,
                     'test_embeddings' : test_embeddings_data_frame, 'test_labels' : test_labels}

### Store neural embeddings

In [21]:
with open('/home/ninow/master_thesis/code/project/results/data_valuation/neural_embeddings_error_prediction.pkl','wb') as handle:
    pkl.dump(neural_embeddings, handle, protocol=pkl.HIGHEST_PROTOCOL)