# Setup

In [2]:
%load_ext autoreload
%autoreload 2

In [6]:
from src.vfm.connection import Connection
from src.vfm.preprocessor import Preprocessor
import pandas as pd
from src.vfm.model.nn.tft import TemporalFusionTransformerModel

  from tqdm.autonotebook import tqdm


In [4]:
connection = Connection()

# Add the parent directory to the sys.path list
# sys.path.insert(0, os.path.abspath('..'))

# Get client
client = connection.get_client()


In [7]:
well = 'W06'
df_raw = connection.get_data(client, well = well, start=pd.Timestamp('2019-01-01'), end=pd.Timestamp('2022-04-18'))

In [8]:
preprocessor = Preprocessor(df_raw=df_raw)
df = preprocessor.preprocess_timeseries()
df.shape

(70, 13)

In [9]:
from sklearn.model_selection import train_test_split

# Select all independent variables.
X = df[['time_step', 'dhp', 'dht', 'whp', 'wht', 'choke', 'dcp']]
# Select only the target variable.
Y = df[['qo_mpfm', 'qg_mpfm', 'qw_mpfm']]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=65)

In [10]:
# Training data
print(f'Train data records  {X_train.shape}')


# Testing data
print(f'Test data records {X_test.shape}')

Train data records  (56, 7)
Test data records (14, 7)


# Temporal Fusion Transformer Model - Well 06

In [11]:
tft_train_df = df[['time_step', 'dhp', 'dht', 'whp', 'wht', 'choke', 'dcp', 'qo_mpfm', 'qg_mpfm', 'qw_mpfm']]
tft_train_df['well'] = well
tft_train_df = tft_train_df.reset_index(drop=True)
tft_train_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tft_train_df['well'] = well


Unnamed: 0,time_step,dhp,dht,whp,wht,choke,dcp,qo_mpfm,qg_mpfm,qw_mpfm,well
0,0,1.632420e+07,81.775019,5.659456e+06,55.487664,0.266403,2.119695e+06,46.163504,5770.970155,0.005544,W06
1,160564,1.472851e+07,81.779837,4.354705e+06,63.998024,0.639000,2.223913e+06,161.678220,19730.379068,0.156350,W06
2,164427,1.522295e+07,81.802201,4.783272e+06,61.866347,0.527622,2.177317e+06,129.344960,15752.985564,0.002812,W06
3,166435,1.629692e+07,81.792378,5.614163e+06,59.510117,0.280901,2.112620e+06,47.306754,5869.085017,0.008071,W06
4,167065,1.635270e+07,81.797316,5.636706e+06,60.449022,0.281301,2.109265e+06,48.185071,5968.807636,0.001276,W06
...,...,...,...,...,...,...,...,...,...,...,...
65,1334921,0.000000e+00,0.000000,3.507099e+06,60.261279,0.772418,2.213360e+06,156.707003,19250.863561,692.881260,W06
66,1356091,0.000000e+00,0.000000,3.271542e+06,61.629887,0.863000,2.222820e+06,164.725428,20459.756094,22477.333547,W06
67,1398428,0.000000e+00,0.000000,3.145420e+06,64.110234,0.999976,2.245061e+06,182.844914,22538.869193,968.140703,W06
68,1399424,0.000000e+00,0.000000,3.171013e+06,64.278815,0.924723,2.228837e+06,173.725389,21173.870221,3888.404818,W06


In [12]:

tf_nn = TemporalFusionTransformerModel(train_data=tft_train_df.head(200000))
time_varying_known_reals = ['dhp', 'dht', 'whp', 'wht', 'choke', 'dcp']
target_fields=['qg_mpfm']
raw_predictions, best_tft = tf_nn.train(time_idx='time_step', time_series_idx=['well'], target_fields=target_fields, time_varying_known_reals=time_varying_known_reals)

CUDA available: True
CUDA version: 12.6
CUDA version: NVIDIA GeForce RTX 3050 6GB Laptop GPU




AssertionError: filters should not remove entries all entries - check encoder/decoder lengths and lags

In [None]:
import matplotlib.pyplot as plt

for well_idx in range(1):  # Plot all workload timeseries
    # for field_idx in range(len(target_fields)):
        fig, ax = plt.subplots(figsize=(10, 4))
        best_tft.plot_prediction(x=raw_predictions.x, out=raw_predictions.output, idx=well_idx, add_loss_to_title=False, ax=ax)
        current_title = ax.get_title()
        new_title = f' {well} qg_mpfm - known reals {time_varying_known_reals} \n' + current_title
        ax.set_title(new_title)