# Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.vfm.connection import Connection
from src.vfm.preprocessor import Preprocessor
import os
import sys

connection = Connection()

# Add the parent directory to the sys.path list
sys.path.insert(0, os.path.abspath('..'))

# Get client
client = connection.get_client()


In [3]:
well = 'W06'
df_raw = connection.get_data(client, well = well, start='2019-01-01', end='2022-04-18')

In [4]:
preprocessor = Preprocessor(df_raw=df_raw)
df = preprocessor.preprocess_timeseries()

In [5]:
from sklearn.model_selection import train_test_split

# Select all independent variables.
X = df[['time_step', 'dhp', 'dht', 'whp', 'wht', 'choke', 'dcp']]
# Select only the target variable.
Y = df[['qo_mpfm', 'qg_mpfm', 'qw_mpfm']]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=65)

In [6]:
# Training data
print(f'Train data records  {X_train.shape}')


# Testing data
print(f'Test data records {X_test.shape}')

Train data records  (511340, 7)
Test data records (127835, 7)


# Transformer Fusion Transformer Model - Well 06


In [7]:
tft_train_df = df[['time_step', 'dhp', 'dht', 'whp', 'wht', 'choke', 'dcp', 'qo_mpfm', 'qg_mpfm', 'qw_mpfm']]
tft_train_df['well'] = well
tft_train_df = tft_train_df.reset_index(drop=True)
tft_train_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tft_train_df['well'] = well


Unnamed: 0,time_step,dhp,dht,whp,wht,choke,dcp,qo_mpfm,qg_mpfm,qw_mpfm,well
0,0,1.656893e+07,81.729813,5.774957e+06,47.395832,0.255,2.147272e+06,25.615540,3436.050914,5.987304,W06
1,1,1.656893e+07,81.729813,5.774957e+06,47.395832,0.264,2.147272e+06,27.944225,3688.212079,0.297962,W06
2,2,1.656893e+07,81.729813,5.740017e+06,49.066841,0.272,2.148035e+06,30.616353,3888.690202,0.003845,W06
3,18,1.630751e+07,81.761414,5.410590e+06,51.649303,0.434,2.153758e+06,69.254925,8439.386319,50.528047,W06
4,19,1.630751e+07,81.761414,5.403103e+06,52.105034,0.441,2.159481e+06,78.280387,8688.843380,0.852449,W06
...,...,...,...,...,...,...,...,...,...,...,...
639170,1702977,0.000000e+00,0.000000,4.912290e+06,53.276909,0.286,2.573718e+06,0.899424,101.253891,0.855294,W06
639171,1702978,0.000000e+00,0.000000,4.912290e+06,52.582466,0.286,2.552461e+06,0.074321,8.366837,0.070675,W06
639172,1702979,0.000000e+00,0.000000,4.888997e+06,51.974823,0.286,2.532112e+06,0.004875,0.548787,0.004636,W06
639173,1702980,0.000000e+00,0.000000,4.879015e+06,51.432297,0.286,2.523210e+06,0.000297,0.008367,0.000041,W06


In [None]:
from src.vfm.model.nn.tft import TemporalFusionTransformerModel
tf_nn = TemporalFusionTransformerModel(train_data=tft_train_df.head(200000))
time_varying_known_reals = ['dhp', 'dht', 'whp', 'wht', 'choke', 'dcp']
target_fields=['qg_mpfm']
raw_predictions, best_tft = tf_nn.train(time_idx='time_step', time_series_idx=['well'], target_fields=target_fields, time_varying_known_reals=time_varying_known_reals)

CUDA available: True
CUDA version: 12.1
CUDA version: NVIDIA GeForce MX450


[I 2023-10-30 23:23:45,506] A new study created in memory with name: no-name-abe7c4bb-01f2-45d1-8101-41ddfb06632f
  gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  dropout=trial.suggest_uniform("dropout", *dropout_range),
  rank_zero_warn(
  rank_zero_warn(
  model.hparams.learning_rate = trial.suggest_loguniform("learning_rate", *learning_rate_range)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                     

Epoch 0:  67%|██████▋   | 12932/19166 [11:51<05:42, 18.18it/s, v_num=0, train_loss_step=62.00]  

In [None]:
import matplotlib.pyplot as plt

for well_idx in range(1):  # Plot all workload timeseries
    # for field_idx in range(len(target_fields)):
        fig, ax = plt.subplots(figsize=(10, 4))
        best_tft.plot_prediction(x=raw_predictions.x, out=raw_predictions.output, idx=well_idx, add_loss_to_title=False, ax=ax)
        current_title = ax.get_title()
        new_title = f' {well} qg_mpfm - known reals {time_varying_known_reals} \n' + current_title
        ax.set_title(new_title)