In [None]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn

from tools.torch_lib import *

from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import copy
from torchmetrics.regression import MeanAbsolutePercentageError

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
gpu = torch.device('cuda')
cpu = torch.device('cpu')
device = cpu

if torch.cuda.is_available():
    device = gpu
    # The flag below controls whether to allow TF32 on matmul. This flag defaults to False
    # in PyTorch 1.12 and later.
    torch.backends.cuda.matmul.allow_tf32 = True
    # The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True.
    torch.backends.cudnn.allow_tf32 = True

print(device)

### Load dataframe

In [None]:
dataset_dir = "dataset/"
dataset_file_name = "1D_2A.csv"
plots_dir = "plots/"

In [None]:
df = pd.read_csv(dataset_dir + dataset_file_name)
df.head()

In [None]:
df

In [None]:
df.columns

In [None]:
# print attribute's min max

In [None]:
for column in df.columns:
    print(f"{column}: min={df[column].min()} max={df[column].max()}")

In [None]:
print(f"ro_well, ro_formation correlation="
      f"{np.corrcoef(df['ro_well'].to_numpy(), df['ro_formation'].to_numpy())[1][0]}")

In [None]:
# attributes in logarithmic scale:
for column in df.columns:
    if column == 'd_well':
        continue
    col_data = df[column].to_numpy()
    print(f"{column}: log_min={np.log(col_data.min())} log_max={np.log(col_data.max())} mean={np.mean(col_data)} std={np.std(col_data)}")

In [None]:
attributes_to_drop = ['A02M01N']
df.drop(attributes_to_drop, axis=1, inplace=True)
df.head()

### Add dataframe transforms

In [None]:
inputs = np.array(['ro_well', 'ro_formation', 'rad_well', 'kanisotrop'])
outputs = np.array(['A04M01N', 'A10M01N', 'A20M05N', 'A40M05N', 'A80M10N']) # 'A02M01N' dropped

In [None]:
logarithmic_columns = ['ro_formation', 'ro_well']
# normalize data ('min/max' normalization):
interval_th = [-1, 1]     # normalization interval for 'th' activation function
interval_sigmoid = [0, 1] # normalization interval for 'sigmoid' activation function
normalize_interval = interval_th

attributes_transform_dict = {}
df_transformed = df.copy()

# transform output attributes:
for output_attr in outputs:
    attr_transformer = attributes_transform_dict[output_attr] = AttributeTransformer(df_transformed[output_attr].to_numpy())

    # logarithmic transform
    forward, backward = np.log, np.exp
    df_transformed[output_attr] = attr_transformer.transform(forward, backward)
    # scaling transform
    forward, backward = get_standard_scaler_transform(attr_transformer.data)
    df_transformed[output_attr] = attr_transformer.transform(forward, backward)
    # normalize transform
    forward, backward = get_normalize_transforms(attr_transformer.data, normalize_interval)
    df_transformed[output_attr] = attr_transformer.transform(forward, backward)

# logarithm resistance:
for col in logarithmic_columns:
    if col in outputs:
        continue
    df_transformed[col] = df_transformed[col].apply(np.log)

# add normalization
for attribute in df_transformed.columns:
    if attribute in outputs:
        continue
    transform, _ = get_standard_scaler_transform(df_transformed[attribute].to_numpy())
    df_transformed[attribute] = transform(df_transformed[attribute].to_numpy())

    transform, _ = get_normalize_transforms(df_transformed[attribute].to_numpy(), normalize_interval)
    df_transformed[attribute] = transform(df_transformed[attribute].to_numpy())

df_transformed

In [None]:
# print statistic data for inference transforms:
for column in df.columns:
    col_data = df[column].to_numpy()

    if column in logarithmic_columns or column in outputs:
        col_data = np.log(col_data) # first transform - log

    col_mean = np.mean(col_data)
    col_std = np.std(col_data)

    print(f"{column}: mean={col_mean} std={col_std}")
    col_data = (col_data - col_mean) / col_std
    print(f"{column}: min={np.min(col_data)} max={np.max(col_data)}")

In [None]:
def print_inference_statistic(attributes, df_):
    means = []
    stds = []
    mins = []
    maxes = []

    for column in attributes:
        col_data = df_[column].to_numpy()

        if column in logarithmic_columns or column in outputs:
            col_data = np.log(col_data) # first transform - log

        col_mean = np.mean(col_data)
        col_std = np.std(col_data)

        means.append(col_mean)
        stds.append(col_std)

        col_data = (col_data - col_mean) / col_std

        mins.append(np.min(col_data))
        maxes.append(np.max(col_data))

    print(f"means={means}")
    print(f"stds={stds}")
    print(f"mins={mins}")
    print(f"maxes={maxes}")

In [None]:
print_inference_statistic(inputs, df)

In [None]:
print_inference_statistic(outputs, df)

### Build Datasets and create dataloaders

In [None]:
class SimpleDataset(Dataset):
    def __init__(self, df_, inputs, outputs, device):
        self.df = df_
        self.inputs = torch.from_numpy(df_[inputs].to_numpy()).float().to(device)
        self.outputs = torch.from_numpy(df_[outputs].to_numpy()).float().to(device)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        item, label = self.inputs[idx], self.outputs[idx]

        return item, label


In [None]:
batch_size = 1000

train_df, test_df = train_test_split(df_transformed, shuffle=True, test_size=0.3)

train_dataset = SimpleDataset(train_df, inputs, outputs, device)
test_dataset = SimpleDataset(test_df, inputs, outputs, device)
full_dataset = SimpleDataset(df_transformed, inputs, outputs, device)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
full_dataset_loader = DataLoader(full_dataset, batch_size=batch_size, shuffle=True)

### Build models

In [None]:
class WeightedMAE(nn.Module):
    def __init__(self, weights):
        super(WeightedMAE, self).__init__()
        self.mae = nn.L1Loss()
        self.weights = weights

    def forward(self, inputs, targets):
        weighted_inputs = inputs * self.weights

        return self.mae(weighted_inputs, targets)

    def to(self, device):
        super().to(device)
        self.weights = self.weights.to(device)


class LinearModel(nn.Module):
    def __init__(self, layers_dims, act_str_list, output_dim):
        super().__init__()
        layers_count = len(layers_dims)
        assert layers_count > 0

        module_list = []
        for i in range(layers_count - 1):
            module_list.append(nn.Linear(layers_dims[i], layers_dims[i + 1]))
        module_list.append(nn.Linear(layers_dims[layers_count - 1], output_dim))

        activations_list = []
        for i in range(layers_count):
            activations_list.append(activations[act_str_list[i]])

        self.linears = nn.ModuleList(module_list)
        self.activations = nn.ModuleList(activations_list)

    def forward(self, x):
        y = x

        for lin, act in zip(self.linears, self.activations):
            y = lin(y)
            y = act(y)

        return y


class LinearLNormModel(nn.Module):
    def __init__(self, layers_dims, act_str_list, output_dim):
        super().__init__()
        layers_count = len(layers_dims)
        assert layers_count > 0

        linears_list = []
        layers_norm_list = []

        for i in range(layers_count - 1):
            in_features, out_features = layers_dims[i], layers_dims[i + 1]
            linears_list.append(nn.Linear(in_features, out_features))
            layers_norm_list.append(nn.LayerNorm(out_features))
        # add last layer
        linears_list.append(nn.Linear(layers_dims[layers_count - 1], output_dim))
        layers_norm_list.append(nn.LayerNorm(output_dim))

        self.linears = nn.ModuleList(linears_list)
        self.activations = nn.ModuleList([activations[act_str_list[i]] for i in range(len(act_str_list))])
        self.layer_normalizations = nn.ModuleList(layers_norm_list)

    def forward(self, x):
        y = x

        for lin, act, norm in zip(self.linears, self.activations, self.layer_normalizations):
            y = lin(y)
            y = norm(y)
            y = act(y)

        return y


# add batch normalization
class LinearBNormModel(nn.Module):
    def __init__(self, layers_dims, act_str_list, output_dim):
        super().__init__()
        layers_count = len(layers_dims)
        assert layers_count > 0

        linears_list = []
        batch_norm_list = []

        for i in range(layers_count - 1):
            in_features, out_features = layers_dims[i], layers_dims[i + 1]
            linears_list.append(nn.Linear(in_features, out_features))
            batch_norm_list.append(nn.BatchNorm1d(out_features))

        linears_list.append(nn.Linear(layers_dims[layers_count - 1], output_dim))
        batch_norm_list.append(nn.BatchNorm1d(output_dim))

        activations_list = []
        for i in range(layers_count):
            activations_list.append(activations[act_str_list[i]])

        self.linears = nn.ModuleList(linears_list)
        self.activations = nn.ModuleList(activations_list)
        self.batch_normalizations = nn.ModuleList(batch_norm_list)

    def forward(self, x):
        y = x

        for lin, act, norm in zip(self.linears, self.activations, self.batch_normalizations):
            y = lin(y)
            y = norm(y)
            y = act(y)

        return y


### Train model

In [None]:
layers_dims = [len(inputs), 40, 120, 1200, 120, 50]
layers_count = len(layers_dims)
activations_string_list = ['leaky-relu' for i in range(layers_count)]
#activations_string_list[-1] = 'sigmoid'

linear_model = LinearModel(layers_dims, activations_string_list, len(outputs)).to(device)
#linear_bn_model = LinearBNormModel(layers_dims, activations_string_list, len(outputs)).to(device)
#linear_ln_model = LinearLNormModel(layers_dims, activations_string_list, len(outputs)).to(device)

model = linear_model
model_name = "linear_model"
linear_model

In [None]:
learning_rate = 0.0001
epoch_count = 1500

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

#loss_function = WeightedMAE(torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0], dtype=float))
loss_function = nn.L1Loss()

In [None]:
epoch_validation = True
train_loss_threshold = 0.0003

train_loss_list, validation_loss_list = train_model(epoch_count, model, optimizer, loss_function, train_loader, test_loader, True, train_loss_threshold)
plot_loss(train_loss_list, "train loss")

In [None]:
test_loss = test_loop(test_loader, model, loss_function)
print(f"test loss={test_loss}")

In [None]:
plot_loss(validation_loss_list, "test loss")

### Plot predictions

In [None]:
for _, (X, y) in enumerate(train_loader):
    print(model(X))
    break

In [None]:
# create dataloader without shuffle
full_inference_dataset_loader = DataLoader(full_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# plot_predictions(outputs, full_dataset_loader, linear_model)

In [None]:
#plot_actual_predictions(outputs, full_inference_dataset_loader, linear_model, attributes_transform_dict, df)

In [None]:
model = torch.load("saved_models/" + "linear_model0_0006122.pth")
model.to(device)

In [None]:
plot_relative_errors(outputs, full_inference_dataset_loader, model, attributes_transform_dict,
                     df, 0.01, device, plots_dir, mode='default+hist', bin_count=100)

#### check predictions manually

In [None]:
predictor = Predictor(full_inference_dataset_loader, df, attributes_transform_dict, model, inputs, outputs)
predictions_dict, actuals_dict = predictor.predict(device)

In [None]:
def compare_prediction(idx: int, prediction_dict, actuals_dict, attribute):
    predicted = prediction_dict[attribute][idx]
    actual = actuals_dict[attribute][idx]
    relative_error = abs(actual - predicted) / actual
    print(f"{idx}: predicted={predicted}; actual={actual}; relative error={relative_error}")

In [None]:
compare_prediction(119896, predictions_dict, actuals_dict, 'A10M01N')

In [None]:
model(torch.tensor([-1.0, -1, -1, -1]).to(device))

In [None]:
model(torch.tensor([-1.0,-1.0,-0.952277, -0.575]).to(device))

### Save model

In [None]:
model.to(cpu)    # attach model to cpu before scripting and saving to prevent cuda meta information saved
scripted_model = torch.jit.script(model)
model_file_name = "saved_models/" + model_name + str(round(test_loss, 7)).replace('.', '_')

scripted_model.save(model_file_name + ".pt") # save torch script model which compatible with pytorch c++ api
torch.save(model, model_file_name + ".pth")   # save model in python services specific format

# attach model back to device:
model.to(device)

In [None]:
#scripted_model(torch.tensor([0.6, 0.362372, 0.04]))

In [None]:
#model(torch.tensor([0.6, 0.362372, 0.04], device=device))