In [1]:
import json
import os
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from xgboost import XGBClassifier

warnings.filterwarnings("ignore")
from sklearn.preprocessing import LabelEncoder

from trans_oil_gas import utils_emb, utils_fix_seeds, utils_model, utils_dataset

Choose GPU device if it is available.

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gpu = [0] if 'cuda' in device.type else 0
device, gpu

(device(type='cuda'), [0])

Fix all possible seeds for results reproducibility.

In [3]:
utils_fix_seeds.fix_seeds(device=device)

# Read data

Reading of already preprocessed data (here synthetic dataset with the same columns as in the original one is presented).

In [4]:
df = pd.read_csv('../data/synthetic_well_log_data.csv')
df

Unnamed: 0,DRHO,DENS,GR,DTC,WELLNAME
0,0.01778,2.3794,1.214982,101.5516,26
1,0.01701,2.3705,1.086457,101.6722,26
2,0.01624,2.3615,0.957933,101.7928,26
3,0.01779,2.3593,0.493992,101.1051,26
4,0.02008,2.3591,-0.076734,100.1601,26
...,...,...,...,...,...
730,0.11269,2.6161,0.742381,92.0410,127
731,0.11269,2.6161,0.775581,92.1752,127
732,0.11269,2.6161,0.686798,91.6055,127
733,0.11269,2.6161,0.614402,91.7522,127


# Load models

In [5]:
results_len = 70
slice_len = 100
input_size = 4

In [6]:
CONFIG = {
    "siamese_rnn": {
        "input_size": 4,
        "output_size": 1,
        "hidden_size": 16,
        "dropout_prob": 0.25,
    },
    "triplet_rnn": {
        "input_size": 4,
        "embedding_size": 64,
    },
}

In [7]:
fixed_params_tr = {
    "d_model": 4,
}
with open(os.path.join("./saves/", "best_params_siamese_transformer.json"), "r") as f:
    best_params_tr_s = json.load(f)
best_params_tr_s["fc_hidden_size"] = best_params_tr_s["hidden_size"]

transformer_siamese = utils_model.SiameseArchitecture(encoder_type="transformer", **fixed_params_tr, **best_params_tr_s).float()
PATH = "./saves/best_siamese_transformer.pth"
transformer_siamese.load_state_dict(torch.load(PATH, map_location=device))
transformer_siamese.eval()

SiameseArchitecture(
  (positional_encoding): PositionalEncoding()
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): EncoderBlock(
        (self_attn): MultiheadAttention(
          (qkv_proj): Linear(in_features=4, out_features=12, bias=True)
          (o_proj): Linear(in_features=4, out_features=4, bias=True)
        )
        (linear_net): Sequential(
          (0): Linear(in_features=4, out_features=2048, bias=True)
          (1): Dropout(p=0.48276561038285704, inplace=False)
          (2): ReLU(inplace=True)
          (3): Linear(in_features=2048, out_features=4, bias=True)
        )
        (norm1): LayerNorm((4,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((4,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.48276561038285704, inplace=False)
      )
      (1): EncoderBlock(
        (self_attn): MultiheadAttention(
          (qkv_proj): Linear(in_features=4, out_features=12, bias=True)
          (o_proj): Linear(in_featu

In [8]:
with open(os.path.join("./saves/", "best_params_triplet_transformer.json"), "r") as f:
    best_params_tr_t = json.load(f)

transformer_triplet = utils_model.TripletArchitecture(encoder_type="transformer", **fixed_params_tr, **best_params_tr_t).float()
PATH = "./saves/best_triplet_transformer.pth"
transformer_triplet.load_state_dict(torch.load(PATH, map_location=device))
transformer_triplet.eval()

TripletArchitecture(
  (positional_encoding): PositionalEncoding()
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): EncoderBlock(
        (self_attn): MultiheadAttention(
          (qkv_proj): Linear(in_features=4, out_features=12, bias=True)
          (o_proj): Linear(in_features=4, out_features=4, bias=True)
        )
        (linear_net): Sequential(
          (0): Linear(in_features=4, out_features=1280, bias=True)
          (1): Dropout(p=0.25339714884646625, inplace=False)
          (2): ReLU(inplace=True)
          (3): Linear(in_features=1280, out_features=4, bias=True)
        )
        (norm1): LayerNorm((4,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((4,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.25339714884646625, inplace=False)
      )
      (1): EncoderBlock(
        (self_attn): MultiheadAttention(
          (qkv_proj): Linear(in_features=4, out_features=12, bias=True)
          (o_proj): Linear(in_featu

In [9]:
fixed_params_inf = {
    "enc_in": 4,
    "distil": None,
    "device": device,
    "attn": "prob",
    "activation": "gelu",
    "output_attention": False,
    "n_seq": 100
}

with open(os.path.join("./saves/", "best_params_siamese_informer.json"), "r") as f:
    best_params_inf_s = json.load(f)
best_params_inf_s["fc_hidden_size"] = 64

informer_siamese = utils_model.SiameseArchitecture(encoder_type="informer", **fixed_params_inf, **best_params_inf_s).float()
PATH = "./saves/best_siamese_informer.pth"
informer_siamese.load_state_dict(torch.load(PATH, map_location=device))
informer_siamese.eval()

SiameseArchitecture(
  (encoder): InformerEncoder(
    (enc_embedding): DataEmbedding(
      (value_embedding): TokenEmbedding(
        (tokenConv): Conv1d(4, 64, kernel_size=(3,), stride=(1,), padding=(1,), padding_mode=circular)
      )
      (position_embedding): PositionalEmbedding()
      (dropout): Dropout(p=0.22941928520960575, inplace=False)
    )
    (encoder): Encoder(
      (attn_layers): ModuleList(
        (0): EncoderLayer(
          (attention): AttentionLayer(
            (inner_attention): ProbAttention(
              (dropout): Dropout(p=0.22941928520960575, inplace=False)
            )
            (query_projection): Linear(in_features=64, out_features=64, bias=True)
            (key_projection): Linear(in_features=64, out_features=64, bias=True)
            (value_projection): Linear(in_features=64, out_features=64, bias=True)
            (out_projection): Linear(in_features=64, out_features=64, bias=True)
          )
          (conv1): Conv1d(64, 128, kernel_size=(

In [10]:
fixed_params_perf = {
    "dim": 4,
    "device": "cpu",
    "use_relu_kernel": False,
    "n_seq": 100
}

with open(os.path.join("./saves/", "best_params_siamese_performer.json"), "r") as f:
    best_params_perf_s = json.load(f)
best_params_perf_s["fc_hidden_size"] = 16

performer_siamese = utils_model.SiameseArchitecture(encoder_type="performer", **fixed_params_perf, **best_params_perf_s).float()
PATH = "./saves/best_siamese_performer.pth"
performer_siamese.load_state_dict(torch.load(PATH, map_location=device))
performer_siamese.eval()

SiameseArchitecture(
  (encoder): PerformerEncoder(
    (multi_head_attention): MultiHeadFAVORAttention(
      (w_q): Linear(in_features=4, out_features=4, bias=True)
      (w_k): Linear(in_features=4, out_features=4, bias=True)
      (w_v): Linear(in_features=4, out_features=4, bias=True)
      (w_o): Linear(in_features=4, out_features=4, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (residual_1): ResidualConnection(
      (norm): LayerNorm()
      (dropout): Dropout(p=0.4758865356491505, inplace=False)
    )
    (feed_forward): FeedForward(
      (w_1): Linear(in_features=4, out_features=16, bias=True)
      (w_2): Linear(in_features=16, out_features=4, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (residual_2): ResidualConnection(
      (norm): LayerNorm()
      (dropout): Dropout(p=0.4758865356491505, inplace=False)
    )
  )
  (embed_layer): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=400, out_featur

# Generate dataset

In [11]:
_, slice_dataset = utils_dataset.generate_datasets(
        df, df,
        well_column="WELLNAME",
        slice_len=slice_len, 
        path_to_saves='./saves/',
        results_len_train=0,
        results_len_test=results_len,
    )

# Get models' embeddings

In [12]:
embeddings = {
    'transformer_siamese': torch.tensor([]),
    'transformer_triplet': torch.tensor([]),
    'informer_siamese': torch.tensor([]),
    'performer_siamese': torch.tensor([])
}

for model_type, model in zip(
        embeddings.keys(),
        [
            transformer_siamese,
            transformer_triplet,
            informer_siamese,
            performer_siamese
        ]
):
    embeddings[model_type], wells = utils_emb.get_embeddings(
        model,
        model_type,
        slice_dataset,
        slice_len=slice_len,
        input_size=df.shape[1]-1
    )

70it [00:24,  2.82it/s]
70it [00:13,  5.22it/s]
70it [00:18,  3.74it/s]
70it [00:02, 32.94it/s]


In [13]:
embeddings['transformer_siamese'] = embeddings['transformer_siamese'][:, None, :]
embeddings['transformer_triplet'] = embeddings['transformer_triplet'][:, None, :]

embeddings['informer_siamese'] = embeddings['informer_siamese'][:, None, :]
embeddings['performer_siamese'] = embeddings['performer_siamese'][:, None, :]

In [14]:
le = LabelEncoder()
wells_tr = le.fit_transform(wells)

In [15]:
wells_tr = torch.tensor(wells_tr)

In [16]:
print(
    len(embeddings['transformer_siamese']), len(embeddings['transformer_triplet']),
    len(embeddings['informer_siamese']),
    len(embeddings['performer_siamese']),
    len(wells_tr), np.unique(wells_tr).shape
)

70 70 70 70 70 (4,)


# Split embeddings on train and test

In [17]:
train = {
    'transformer_siamese':torch.tensor([]),
    'transformer_triplet':torch.tensor([]),
    'informer_siamese':torch.tensor([]),
    'performer_siamese':torch.tensor([]),
}
test = {
    'transformer_siamese': torch.tensor([]),
    'transformer_triplet': torch.tensor([]),
    'informer_siamese': torch.tensor([]),
    'performer_siamese': torch.tensor([]),
}

In [18]:
train_wells = []
test_wells = []

for i, (model, embeds) in enumerate(embeddings.items()):
    for well in np.unique(wells_tr):
        embs = embeds[wells_tr == well]
        
        train[model] = torch.cat(
            [
                train[model], 
                embs[:len(embs) // 2 + 1].detach()
            ], dim=0
        )
        test[model] = torch.cat(
            [
                test[model], 
                embs[len(embs) // 2 + 1:].detach()
            ], dim=0
        )
        
        if i == 0:
            train_wells.extend([well for j in range(0, len(embs) // 2 + 1)])
            test_wells.extend([well for j in range(0, len(embs) - len(embs) // 2 - 1)])

print(
    len(train['transformer_siamese']), len(test['transformer_siamese']), '\n',
    len(train['informer_siamese']), len(test['informer_siamese']), '\n',
    len(train['performer_siamese']), len(test['performer_siamese']), '\n',
    len(train_wells), len(test_wells)
)

37 33 
 37 33 
 37 33 
 37 33


# Train classifiers
We train models to classify embeddings on wells.

In [19]:
output_size = len(torch.unique(wells_tr))
batch_size = 64
epochs = 3

In [20]:
input_sizes = {
    'transformer_siamese': best_params_tr_s["fc_hidden_size"],
    'transformer_triplet': best_params_tr_t["hidden_size"],
    'informer_siamese': best_params_inf_s["fc_hidden_size"],
    'performer_siamese': 4, # "dim" param in PerformerEncoder that was used for training in `all_models.ipynb`
}

## XGBoost

In [21]:
results_xgb = []

for model_name in train.keys():    
    print("#"*100)
    print(model_name)
    print("#"*100)

    model = XGBClassifier()

    model.fit(
        train[model_name].detach().cpu().numpy()[:, 0, :],
        train_wells
    )

    y_preds = model.predict_proba(test[model_name].detach().cpu().numpy()[:, 0, :])

    acc, roc_auc, pr_auc, precision, recall = utils_emb.calculate_metrics_for_final(
        target=torch.tensor(test_wells),
        y_pred=torch.tensor(y_preds)
    )
    print(acc, roc_auc, pr_auc, precision, recall)

    results_xgb.append((acc, roc_auc, pr_auc, precision, recall))

####################################################################################################
transformer_siamese
####################################################################################################
0.8484848484848485 0.9007523148148149 0.802386589105339 0.8784722222222222 0.84375
####################################################################################################
transformer_triplet
####################################################################################################
0.8484848484848485 0.9683159722222222 0.9335301658218325 0.8493055555555555 0.8472222222222222
####################################################################################################
informer_siamese
####################################################################################################
0.9696969696969697 0.9986979166666666 0.9965277777777778 0.975 0.96875
########################################################################################

## One linear layer

In [22]:
results_1_layer = []

for model_name in train.keys():
    print("#"*100)
    print(model_name)
    print("#"*100)

    model = nn.Linear(input_sizes[model_name], output_size)

    model, logger, metrics = utils_emb.train_test_linear_classifier(
        train[model_name],
        test[model_name],
        train_wells,
        test_wells,
        model,
        model_name,
        batch_size=batch_size,
        epochs=epochs,
        gpu=device.index,
        log_dir="./logs",
    )

    results_1_layer.append(metrics)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name          | Type             | Params
---------------------------------------------------
0 | model         | Linear           | 516   
1 | loss_function | CrossEntropyLoss | 0     
---------------------------------------------------
516       Trainable params
0         Non-trainable params
516       Total params
0.002     Total estimated model params size (MB)


####################################################################################################
transformer_siamese
####################################################################################################


Validation sanity check: 0it [00:00, ?it/s]

mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.6725983796296297
mean_pr_auc = 0.44080955489472284


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.6767939814814814
mean_pr_auc = 0.43714798396889754


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.6715856481481483
mean_pr_auc = 0.4401330125784494


Validating: 0it [00:00, ?it/s]

GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name          | Type             | Params
---------------------------------------------------
0 | model         | Linear           | 516   
1 | loss_function | CrossEntropyLoss | 0     
---------------------------------------------------
516       Trainable params
0         Non-trainable params
516       Total params
0.002     Total estimated model params size (MB)


mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.6665219907407407
mean_pr_auc = 0.43688534838119153
Accuracy: 0.2727, ROC AUC: 0.6665, PR AUC: 0.4369, Precision: 0.0682, Recall: 0.25
####################################################################################################
transformer_triplet
####################################################################################################


Validation sanity check: 0it [00:00, ?it/s]

mean_accuracy = 0.030303030303030304
mean_roc_auc = 0.2960069444444444
mean_pr_auc = 0.20181342649542125


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.09090909090909091
mean_roc_auc = 0.3431712962962963
mean_pr_auc = 0.21740459331911446


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.18181818181818182
mean_roc_auc = 0.40625
mean_pr_auc = 0.25486869621365676


Validating: 0it [00:00, ?it/s]

GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name          | Type             | Params
---------------------------------------------------
0 | model         | Linear           | 260   
1 | loss_function | CrossEntropyLoss | 0     
---------------------------------------------------
260       Trainable params
0         Non-trainable params
260       Total params
0.001     Total estimated model params size (MB)


mean_accuracy = 0.24242424242424243
mean_roc_auc = 0.5101273148148148
mean_pr_auc = 0.3166682118109628
Accuracy: 0.2424, ROC AUC: 0.5101, PR AUC: 0.3167, Precision: 0.0769, Recall: 0.2222
####################################################################################################
informer_siamese
####################################################################################################


Validation sanity check: 0it [00:00, ?it/s]

mean_accuracy = 0.0
mean_roc_auc = 0.3541666666666667
mean_pr_auc = 0.2253682071585013


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.0
mean_roc_auc = 0.3576388888888889
mean_pr_auc = 0.2281707865235807


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.0
mean_roc_auc = 0.3634259259259259
mean_pr_auc = 0.23312786480866232


Validating: 0it [00:00, ?it/s]

GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name          | Type             | Params
---------------------------------------------------
0 | model         | Linear           | 20    
1 | loss_function | CrossEntropyLoss | 0     
---------------------------------------------------
20        Trainable params
0         Non-trainable params
20        Total params
0.000     Total estimated model params size (MB)


mean_accuracy = 0.0
mean_roc_auc = 0.37065972222222215
mean_pr_auc = 0.23820028944383898
Accuracy: 0.0, ROC AUC: 0.3707, PR AUC: 0.2382, Precision: 0.0, Recall: 0.0
####################################################################################################
performer_siamese
####################################################################################################


Validation sanity check: 0it [00:00, ?it/s]

mean_accuracy = 0.24242424242424243
mean_roc_auc = 0.4415509259259259
mean_pr_auc = 0.33927852240690204


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.24242424242424243
mean_roc_auc = 0.4454571759259259
mean_pr_auc = 0.3407627434876788


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.24242424242424243
mean_roc_auc = 0.45283564814814814
mean_pr_auc = 0.3483636890042254


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.24242424242424243
mean_roc_auc = 0.4616608796296296
mean_pr_auc = 0.3514051631834037
Accuracy: 0.2424, ROC AUC: 0.4617, PR AUC: 0.3514, Precision: 0.1605, Recall: 0.2465


To analyse training process, one can use `tensorboard` extension for jupyter notebook. To use it, please, uncomment the cell below:

In [23]:
# %load_ext tensorboard
# %tensorboard --logdir ./logs --host 0.0.0.0

## FCNN

In [24]:
results_multi_layer = []

for model_name in train.keys():
    print("#"*100)
    print(model_name)
    print("#"*100)

    # Hyperparameters for our network
    hidden_sizes = [64, 128]

    # Build a feed-forward network
    model = nn.Sequential(
        nn.Linear(input_sizes[model_name], hidden_sizes[0]),
        nn.ReLU(),
        nn.Linear(hidden_sizes[0], hidden_sizes[1]),
        nn.ReLU(),
        nn.Linear(hidden_sizes[1], output_size),
    )

    model, logger, metrics_m = utils_emb.train_test_linear_classifier(
        train[model_name],
        test[model_name],
        train_wells,
        test_wells,
        model,
        model_name,
        batch_size=batch_size,
        epochs=epochs,
        gpu=device.index,
        log_dir="./logs",
    )

    results_multi_layer.append(metrics_m)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name          | Type             | Params
---------------------------------------------------
0 | model         | Sequential       | 17.1 K
1 | loss_function | CrossEntropyLoss | 0     
---------------------------------------------------
17.1 K    Trainable params
0         Non-trainable params
17.1 K    Total params
0.068     Total estimated model params size (MB)


####################################################################################################
transformer_siamese
####################################################################################################


Validation sanity check: 0it [00:00, ?it/s]

mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.8120659722222223
mean_pr_auc = 0.5313618210493211


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.8327546296296297
mean_pr_auc = 0.6364221249553912


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.8463541666666666
mean_pr_auc = 0.644677688212171


Validating: 0it [00:00, ?it/s]

GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name          | Type             | Params
---------------------------------------------------
0 | model         | Sequential       | 17.1 K
1 | loss_function | CrossEntropyLoss | 0     
---------------------------------------------------
17.1 K    Trainable params
0         Non-trainable params
17.1 K    Total params
0.068     Total estimated model params size (MB)


mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.8499710648148149
mean_pr_auc = 0.6437155165096342
Accuracy: 0.2727, ROC AUC: 0.85, PR AUC: 0.6437, Precision: 0.0682, Recall: 0.25
####################################################################################################
transformer_triplet
####################################################################################################


Validation sanity check: 0it [00:00, ?it/s]

mean_accuracy = 0.42424242424242425
mean_roc_auc = 0.8096064814814815
mean_pr_auc = 0.6345273168189834


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.6060606060606061
mean_roc_auc = 0.9331597222222222
mean_pr_auc = 0.843030753968254


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.7575757575757576
mean_roc_auc = 0.9486400462962963
mean_pr_auc = 0.8942003598253598


Validating: 0it [00:00, ?it/s]

GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name          | Type             | Params
---------------------------------------------------
0 | model         | Sequential       | 13.0 K
1 | loss_function | CrossEntropyLoss | 0     
---------------------------------------------------
13.0 K    Trainable params
0         Non-trainable params
13.0 K    Total params
0.052     Total estimated model params size (MB)


mean_accuracy = 0.7272727272727273
mean_roc_auc = 0.9509548611111112
mean_pr_auc = 0.9000270562770563
Accuracy: 0.7273, ROC AUC: 0.951, PR AUC: 0.9, Precision: 0.6176, Recall: 0.75
####################################################################################################
informer_siamese
####################################################################################################


Validation sanity check: 0it [00:00, ?it/s]

mean_accuracy = 0.24242424242424243
mean_roc_auc = 0.4764178240740741
mean_pr_auc = 0.41601666180777525


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.5151515151515151
mean_roc_auc = 0.6950231481481483
mean_pr_auc = 0.45013965569113334


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.5151515151515151
mean_roc_auc = 0.9058159722222222
mean_pr_auc = 0.7768783300033301


Validating: 0it [00:00, ?it/s]

GPU available: True, used: False
TPU available: False, using: 0 TPU cores

  | Name          | Type             | Params
---------------------------------------------------
0 | model         | Sequential       | 9.2 K 
1 | loss_function | CrossEntropyLoss | 0     
---------------------------------------------------
9.2 K     Trainable params
0         Non-trainable params
9.2 K     Total params
0.037     Total estimated model params size (MB)


mean_accuracy = 0.48484848484848486
mean_roc_auc = 0.8893229166666666
mean_pr_auc = 0.7504088793151293
Accuracy: 0.4848, ROC AUC: 0.8893, PR AUC: 0.7504, Precision: 0.2426, Recall: 0.5
####################################################################################################
performer_siamese
####################################################################################################


Validation sanity check: 0it [00:00, ?it/s]

mean_accuracy = 0.12121212121212122
mean_roc_auc = 0.33695023148148145
mean_pr_auc = 0.22710807457332416


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.2727272727272727
mean_roc_auc = 0.6921296296296297
mean_pr_auc = 0.4920970859772943


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.6363636363636364
mean_roc_auc = 0.9231770833333334
mean_pr_auc = 0.8708581166601467


Validating: 0it [00:00, ?it/s]

mean_accuracy = 0.7272727272727273
mean_roc_auc = 0.9482060185185185
mean_pr_auc = 0.8694110287860287
Accuracy: 0.7273, ROC AUC: 0.9482, PR AUC: 0.8694, Precision: 0.7777, Recall: 0.7361


# Result aggregation

In [25]:
models_names = [
    "Siamese Transformer",
    "Triplet Transformer",
    "Siamese Informer",
    "Siamese Performer"
]

models_names_1 = [name + " " for name in models_names]
models_names_2 = [name + "  " for name in models_names]

In [26]:
ans_dict = {
    k: v for k, v in zip(models_names, results_xgb)
}

ans_dict_1 = {
    k: v for k, v in zip(models_names_1, results_1_layer)
}

ans_dict_2 = {
    k: v for k, v in zip(models_names_2, results_multi_layer)
}

ans_dict.update(ans_dict_1)
ans_dict.update(ans_dict_2)

table = pd.DataFrame(ans_dict)

table.rename(
    index={
        0: "Accuracy",
        1: "ROC AUC",
        2: "PR AUC",
        3: "Precision",
        4: "Recall"
    },
    inplace=True
)

table = np.round(table, 3).T
classification_model = ['xgb'] * len(models_names) + ['1 linear layer'] * len(models_names) + ['fcnn'] * len(models_names)

table['Classificatiom model'] = classification_model
table.index.name = 'Model'
ans_table = table.iloc[:,[-1, 0, 3, 4, 1, 2,]]
ans_table.to_csv('embeddings_quality.csv')

In [27]:
ans_table

Unnamed: 0_level_0,Classificatiom model,Accuracy,Precision,Recall,ROC AUC,PR AUC
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Siamese Transformer,xgb,0.848,0.878,0.844,0.901,0.802
Triplet Transformer,xgb,0.848,0.849,0.847,0.968,0.934
Siamese Informer,xgb,0.97,0.975,0.969,0.999,0.997
Siamese Performer,xgb,0.848,0.869,0.854,0.952,0.901
Siamese Transformer,1 linear layer,0.273,0.068,0.25,0.667,0.437
Triplet Transformer,1 linear layer,0.242,0.077,0.222,0.51,0.317
Siamese Informer,1 linear layer,0.0,0.0,0.0,0.371,0.238
Siamese Performer,1 linear layer,0.242,0.161,0.247,0.462,0.351
Siamese Transformer,fcnn,0.273,0.068,0.25,0.85,0.644
Triplet Transformer,fcnn,0.727,0.618,0.75,0.951,0.9
