In [1]:
import json
import os
import warnings

import matplotlib.pyplot as plt
import pandas as pd
import torch

warnings.filterwarnings("ignore")


from trans_oil_gas import utils_fix_seeds, utils_model, utils_clustering

Choose GPU device if it is available.

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gpu = [0] if 'cuda' in device.type else 0
device, gpu

(device(type='cuda'), [0])

Fix all possible seeds for results reproducibility.

In [3]:
utils_fix_seeds.fix_seeds(device=device)

# Read data

Reading of already preprocessed data (here synthetic dataset with the same columns as in the original one is presented).

In [4]:
df = pd.read_csv('../data/synthetic_well_log_data.csv')
df

Unnamed: 0,DRHO,DENS,GR,DTC,WELLNAME
0,0.01778,2.3794,1.214982,101.5516,26
1,0.01701,2.3705,1.086457,101.6722,26
2,0.01624,2.3615,0.957933,101.7928,26
3,0.01779,2.3593,0.493992,101.1051,26
4,0.02008,2.3591,-0.076734,100.1601,26
...,...,...,...,...,...
730,0.11269,2.6161,0.742381,92.0410,127
731,0.11269,2.6161,0.775581,92.1752,127
732,0.11269,2.6161,0.686798,91.6055,127
733,0.11269,2.6161,0.614402,91.7522,127


# Load models

In [5]:
results_len = 400
slice_len = 100
input_size = 4
n_splits = 2
n_times = 2

In [6]:
CONFIG = {
    "siamese_rnn": {
        "input_size": 4,
        "output_size": 1,
        "hidden_size": 16,
        "dropout_prob": 0.25,
    },
    "triplet_rnn": {
        "input_size": 4,
        "embedding_size": 64,
    },
}

In [7]:
fixed_params_tr = {
    "d_model": 4,
}
with open(os.path.join("./saves/", "best_params_siamese_transformer.json"), "r") as f:
    best_params_tr_s = json.load(f)
best_params_tr_s["fc_hidden_size"] = best_params_tr_s["hidden_size"]

transformer_siamese = utils_model.SiameseArchitecture(encoder_type="transformer", **fixed_params_tr, **best_params_tr_s).float()
PATH = "./saves/best_siamese_transformer.pth"
transformer_siamese.load_state_dict(torch.load(PATH, map_location=device))
transformer_siamese.eval()

SiameseArchitecture(
  (positional_encoding): PositionalEncoding()
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): EncoderBlock(
        (self_attn): MultiheadAttention(
          (qkv_proj): Linear(in_features=4, out_features=12, bias=True)
          (o_proj): Linear(in_features=4, out_features=4, bias=True)
        )
        (linear_net): Sequential(
          (0): Linear(in_features=4, out_features=2048, bias=True)
          (1): Dropout(p=0.48276561038285704, inplace=False)
          (2): ReLU(inplace=True)
          (3): Linear(in_features=2048, out_features=4, bias=True)
        )
        (norm1): LayerNorm((4,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((4,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.48276561038285704, inplace=False)
      )
      (1): EncoderBlock(
        (self_attn): MultiheadAttention(
          (qkv_proj): Linear(in_features=4, out_features=12, bias=True)
          (o_proj): Linear(in_featu

In [8]:
with open(os.path.join("./saves/", "best_params_triplet_transformer.json"), "r") as f:
    best_params_tr_t = json.load(f)

transformer_triplet = utils_model.TripletArchitecture(encoder_type="transformer", **fixed_params_tr, **best_params_tr_t).float()
PATH = "./saves/best_triplet_transformer.pth"
transformer_triplet.load_state_dict(torch.load(PATH, map_location=device))
transformer_triplet.eval()

TripletArchitecture(
  (positional_encoding): PositionalEncoding()
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): EncoderBlock(
        (self_attn): MultiheadAttention(
          (qkv_proj): Linear(in_features=4, out_features=12, bias=True)
          (o_proj): Linear(in_features=4, out_features=4, bias=True)
        )
        (linear_net): Sequential(
          (0): Linear(in_features=4, out_features=1280, bias=True)
          (1): Dropout(p=0.25339714884646625, inplace=False)
          (2): ReLU(inplace=True)
          (3): Linear(in_features=1280, out_features=4, bias=True)
        )
        (norm1): LayerNorm((4,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((4,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.25339714884646625, inplace=False)
      )
      (1): EncoderBlock(
        (self_attn): MultiheadAttention(
          (qkv_proj): Linear(in_features=4, out_features=12, bias=True)
          (o_proj): Linear(in_featu

In [9]:
fixed_params_inf = {
    "enc_in": 4,
    "distil": None,
    "device": device,
    "attn": "prob",
    "activation": "gelu",
    "output_attention": False,
    "n_seq": 100
}
with open(os.path.join("./saves/", "best_params_siamese_informer.json"), "r") as f:
    best_params_inf_s = json.load(f)
best_params_inf_s["fc_hidden_size"] = 64

informer_siamese = utils_model.SiameseArchitecture(encoder_type="informer", **fixed_params_inf, **best_params_inf_s).float()
PATH = "./saves/best_siamese_informer.pth"
informer_siamese.load_state_dict(torch.load(PATH, map_location=device))
informer_siamese.eval()

SiameseArchitecture(
  (encoder): InformerEncoder(
    (enc_embedding): DataEmbedding(
      (value_embedding): TokenEmbedding(
        (tokenConv): Conv1d(4, 64, kernel_size=(3,), stride=(1,), padding=(1,), padding_mode=circular)
      )
      (position_embedding): PositionalEmbedding()
      (dropout): Dropout(p=0.22941928520960575, inplace=False)
    )
    (encoder): Encoder(
      (attn_layers): ModuleList(
        (0): EncoderLayer(
          (attention): AttentionLayer(
            (inner_attention): ProbAttention(
              (dropout): Dropout(p=0.22941928520960575, inplace=False)
            )
            (query_projection): Linear(in_features=64, out_features=64, bias=True)
            (key_projection): Linear(in_features=64, out_features=64, bias=True)
            (value_projection): Linear(in_features=64, out_features=64, bias=True)
            (out_projection): Linear(in_features=64, out_features=64, bias=True)
          )
          (conv1): Conv1d(64, 128, kernel_size=(

In [10]:
with open(os.path.join("./saves/", "best_params_triplet_informer.json"), "r") as f:
    best_params_inf_t = json.load(f)

informer_triplet = utils_model.TripletArchitecture(encoder_type="informer", **fixed_params_inf, **best_params_inf_t).float()
PATH = "./saves/best_triplet_informer.pth"
informer_triplet.load_state_dict(torch.load(PATH, map_location=device))
informer_triplet.eval()

TripletArchitecture(
  (encoder): InformerEncoder(
    (enc_embedding): DataEmbedding(
      (value_embedding): TokenEmbedding(
        (tokenConv): Conv1d(4, 64, kernel_size=(3,), stride=(1,), padding=(1,), padding_mode=circular)
      )
      (position_embedding): PositionalEmbedding()
      (dropout): Dropout(p=0.4264759660819143, inplace=False)
    )
    (encoder): Encoder(
      (attn_layers): ModuleList(
        (0): EncoderLayer(
          (attention): AttentionLayer(
            (inner_attention): ProbAttention(
              (dropout): Dropout(p=0.4264759660819143, inplace=False)
            )
            (query_projection): Linear(in_features=64, out_features=64, bias=True)
            (key_projection): Linear(in_features=64, out_features=64, bias=True)
            (value_projection): Linear(in_features=64, out_features=64, bias=True)
            (out_projection): Linear(in_features=64, out_features=64, bias=True)
          )
          (conv1): Conv1d(64, 1024, kernel_size=(1

In [11]:
fixed_params_perf = {
    "dim": 4,
    "device": "cpu",
    "use_relu_kernel": False,
    "n_seq": 100
}
with open(os.path.join("./saves/", "best_params_siamese_performer.json"), "r") as f:
    best_params_perf_s = json.load(f)
best_params_perf_s["fc_hidden_size"] = 16

performer_siamese = utils_model.SiameseArchitecture(encoder_type="performer", **fixed_params_perf, **best_params_perf_s).float()
PATH = "./saves/best_siamese_performer.pth"
performer_siamese.load_state_dict(torch.load(PATH, map_location=device))
performer_siamese.eval()

SiameseArchitecture(
  (encoder): PerformerEncoder(
    (multi_head_attention): MultiHeadFAVORAttention(
      (w_q): Linear(in_features=4, out_features=4, bias=True)
      (w_k): Linear(in_features=4, out_features=4, bias=True)
      (w_v): Linear(in_features=4, out_features=4, bias=True)
      (w_o): Linear(in_features=4, out_features=4, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (residual_1): ResidualConnection(
      (norm): LayerNorm()
      (dropout): Dropout(p=0.4758865356491505, inplace=False)
    )
    (feed_forward): FeedForward(
      (w_1): Linear(in_features=4, out_features=16, bias=True)
      (w_2): Linear(in_features=16, out_features=4, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (residual_2): ResidualConnection(
      (norm): LayerNorm()
      (dropout): Dropout(p=0.4758865356491505, inplace=False)
    )
  )
  (embed_layer): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=400, out_featur

In [12]:
with open(os.path.join("./saves/", "best_params_triplet_performer.json"), "r") as f:
    best_params_perf_t = json.load(f)

performer_triplet = utils_model.TripletArchitecture(encoder_type="performer", **fixed_params_perf, **best_params_perf_t).float()
PATH = "./saves/best_triplet_performer.pth" # "../saves/best_triplet_performer.pth"
performer_triplet.load_state_dict(torch.load(PATH, map_location=device))
performer_triplet.eval()

TripletArchitecture(
  (encoder): PerformerEncoder(
    (multi_head_attention): MultiHeadFAVORAttention(
      (w_q): Linear(in_features=4, out_features=4, bias=True)
      (w_k): Linear(in_features=4, out_features=4, bias=True)
      (w_v): Linear(in_features=4, out_features=4, bias=True)
      (w_o): Linear(in_features=4, out_features=4, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (residual_1): ResidualConnection(
      (norm): LayerNorm()
      (dropout): Dropout(p=0.8062925554266286, inplace=False)
    )
    (feed_forward): FeedForward(
      (w_1): Linear(in_features=4, out_features=16, bias=True)
      (w_2): Linear(in_features=16, out_features=4, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (residual_2): ResidualConnection(
      (norm): LayerNorm()
      (dropout): Dropout(p=0.8062925554266286, inplace=False)
    )
  )
  (embed_layer): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=400, out_featur

# Clustering

In [13]:
ari = dict()
for model, model_type in zip(
    [
        transformer_siamese, 
        transformer_triplet, 
        informer_siamese,
        informer_triplet, 
        performer_siamese, 
        performer_triplet
    ],
    [
        'siamese_transformer', 
        'triplet_transformer', 
        'siamese_informer', 
        'triplet_informer', 
        'siamese_performer', 
        'triplet_performer'
    ]
):
    print(model_type)
    metrics = utils_clustering.emb_clustering_wells(
        model,
        df,
        slice_len=slice_len,
        n_splits=n_splits, 
        results_len_test=results_len,
        n_times=n_times,
        path_to_saves="./saves",
    )
    ari[model_type] = (metrics.mean(), metrics.std())

siamese_transformer


400it [02:07,  3.13it/s]


ARI = 0.269 ± 0.0


400it [02:11,  3.05it/s]


ARI = 1.0 ± 0.0
triplet_transformer


400it [01:17,  5.13it/s]


ARI = 1.0 ± 0.0


400it [01:17,  5.16it/s]


ARI = 1.0 ± 0.0
siamese_informer


400it [01:59,  3.34it/s]


ARI = 1.0 ± 0.0


400it [01:57,  3.41it/s]


ARI = 1.0 ± 0.0
triplet_informer


400it [05:24,  1.23it/s]


ARI = 1.0 ± 0.0


400it [06:01,  1.11it/s]


ARI = 1.0 ± 0.0
siamese_performer


400it [00:14, 26.98it/s]


ARI = 1.0 ± 0.0


400it [00:07, 55.64it/s]


ARI = 1.0 ± 0.0
triplet_performer


400it [00:07, 56.53it/s]


ARI = 1.0 ± 0.0


400it [00:07, 54.06it/s]


ARI = 1.0 ± 0.0


# Results' aggregation

In [14]:
ans_df = pd.DataFrame(ari, index=['mean', 'std']).T
ans_df['mean ± std'] = [
    str(round(ans_df.iloc[i]['mean'], 3)) + ' $\pm$ ' + str(round(ans_df.iloc[i]['std'], 3)) for i in range(len(ans_df))
]
ans_df[['mean ± std']].to_csv('clustering.csv')

In [15]:
ans_df

Unnamed: 0,mean,std,mean ± std
siamese_transformer,0.6345,0.3655,0.635 $\pm$ 0.366
triplet_transformer,1.0,0.0,1.0 $\pm$ 0.0
siamese_informer,1.0,0.0,1.0 $\pm$ 0.0
triplet_informer,1.0,0.0,1.0 $\pm$ 0.0
siamese_performer,1.0,0.0,1.0 $\pm$ 0.0
triplet_performer,1.0,0.0,1.0 $\pm$ 0.0
