### TimeGPT-1 testing

Imports

In [None]:
#%pip install nixtla
#%pip install -U ipywidgets

In [None]:
from nixtla import NixtlaClient

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib

from sklearn.metrics import r2_score


<span style="color:red"><sup>!!! Save the API key to the NIXTLA_API_KEY.txt</sup></span>

API key setup

In [None]:
# Read the API key
with open('NIXTLA_API_KEY.txt', 'r') as file:
    nixtla_api_key = file.read()

In [None]:
# Set the API key
nixtla_client = NixtlaClient(
    api_key = nixtla_api_key
)

In [None]:
# Check the API key status (sometime says false, even though it is valid)
nixtla_client.validate_api_key()

#### Aquifer data

Quick test

In [None]:
# Read the dataset
aquifer_by_stations = joblib.load('aquifer_by_stations.joblib')

In [None]:
# Choose a station
aquifer = aquifer_by_stations[1005]

In [None]:
# Plot the data
plt.figure(figsize=(8, 4))
plt.plot(aquifer['date'], aquifer['altitude'], color="royalblue", label="true data")
plt.legend()
plt.grid()
plt.show()

In [None]:
timegpt_fcst_df = nixtla_client.forecast(df=aquifer[:-30], h=30, freq='D', time_col='date', target_col='altitude')
timegpt_fcst_df.head()

In [None]:
# Plot the predictions
plt.figure(figsize=(8, 4))
plt.plot(aquifer['date'][-100:], aquifer['altitude'][-100:], color="royalblue", label="true data")
plt.plot(aquifer['date'][-30:], timegpt_fcst_df['TimeGPT'], color="tomato", label="forecast")
plt.legend()
plt.grid()
plt.show()

##### Forecast for altitude differences

In [None]:
aquifers_list = [85065, 85064]

In [None]:
horizon = 5 # prediction horizon
day_len = 200 # number of days to forecast

# List for r2 results for different prediction horizons
r2_scores = [[] for _ in range(horizon)]

for aquifer in aquifers_list:
    # List for storing the predictions
    predictions = [[] for _ in range(5)]

    # Iterate from day_len days before the end, to the last day
    for i in range(day_len + (horizon-1), 0, -1):
        y = aquifer_by_stations[aquifer]
        timegpt_fcst = nixtla_client.forecast(df=y[:-i], h=horizon, freq='D', time_col='date', target_col='altitude_diff')

        # Store the results for every prediction horizon separately
        for i in range(horizon):
            predictions[i].append(timegpt_fcst['TimeGPT'][i])
    
    # Clean up the results
    predictions[0] = predictions[0][-200:]
    predictions[1] = predictions[1][3:-1]
    predictions[2] = predictions[2][2:-2]
    predictions[3] = predictions[3][1:-3]
    predictions[4] = predictions[4][0:-4]

    # Calculate the r2 scores and store them in a list
    for i in range(horizon):
        r2_scores[i].append(r2_score(aquifer_by_stations[aquifer]['altitude_diff'][-day_len:], predictions[i]))

In [None]:
# Calculate the average r2 score
r2_average =  []
std_dev = []

for i in range(5):
    r2_average.append(np.mean(r2_scores[i]))
    std_dev.append(np.std(r2_scores[i]))

In [None]:
r2_average

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(aquifer_by_stations[aquifer]['date'][-200:], aquifer_by_stations[aquifer]['altitude_diff'][-200:], color="royalblue", label="true data")
plt.plot(aquifer_by_stations[aquifer]['date'][-day_len:], predictions[0], color="tomato", label="forecast")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Save the average r2_scores
with open('../reports/timegpt-1/timegpt-ground-water-r2.txt', 'w') as file:
    for item in r2_average:
        file.write(f"{item}\n")

In [None]:
# Save the standar deviations
with open('../reports/timegpt-1/timegpt-ground-water-std-dev.txt', 'w') as file:
    for item in std_dev:
        file.write(f"{item}\n")

In [None]:
# Transpose the r2_scores list
r2_scores_transposed = [list(x) for x in zip(*r2_scores)]
# Pair up the stations with their r2_scores and store them in a dictionary
scores = dict(zip(aquifers_list, r2_scores_transposed))
scores

In [None]:
# Sort them by the value in r2_scores[0]
scores_sorted = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1][0])}
scores_sorted

In [None]:
# Save the r2_scores
joblib.dump(scores_sorted, '../reports/timegpt-1/timegpt-ground-water-r2-stations')

#### Surface water data

In [None]:
# Read the dataset
watercourse_by_stations = joblib.load('../data/interim/watercourse_by_stations.joblib')

In [None]:
# List of station used for testing
station_list = ['2530', '2620', '4200', '4230', '4270', '4515', '4520', '4570', '4575', '5040', '5078', '5330', '5425', '5500', '6060', '6068', '6200', '6220', '6300', '6340', '8454', '8565']

In [None]:
# Cast the stations to int
for i in range(len(station_list)):
    station_list[i] = int(station_list[i])

In [None]:
horizon = 5 # prediction horizon
day_len = 200 # number of days to forecast

# List for r2 results for different prediction horizons
r2_scores = [[] for _ in range(horizon)]

for station in station_list:
    # List for storing the predictions
    predictions = [[] for _ in range(5)]

    # Iterate from day_len days before the end, to the last day
    for i in range(day_len + (horizon-1), 0, -1):
        y = watercourse_by_stations[station]
        timegpt_fcst = nixtla_client.forecast(df=y[:-i], h=horizon, freq='D', time_col='date', target_col='level_diff')

        # Store the results for every prediction horizon separately
        for i in range(horizon):
            predictions[i].append(timegpt_fcst['TimeGPT'][i])
    
    # Clean up the results
    predictions[0] = predictions[0][-200:]
    predictions[1] = predictions[1][3:-1]
    predictions[2] = predictions[2][2:-2]
    predictions[3] = predictions[3][1:-3]
    predictions[4] = predictions[4][0:-4]

    # Calculate the r2 scores and store them in a list
    for i in range(horizon):
        r2_scores[i].append(r2_score(watercourse_by_stations[station]['level_diff'][-day_len:], predictions[i]))

In [None]:
# Plot the prediction
plt.figure(figsize=(8, 4))
plt.plot(watercourse_by_stations[station_list[0]]['date'][-200:], watercourse_by_stations[1335]['level_diff'][-200:], color="royalblue", label="water level")
plt.plot(watercourse_by_stations[station_list[0]]['date'][-day_len:], predictions[0], color="tomato", label="forecast")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Calculate the average r2 score
r2_average =  []
std_dev = []

for i in range(5):
    r2_average.append(np.mean(r2_scores[i]))
    std_dev.append(np.std(r2_scores[i]))

In [None]:
r2_average

In [None]:
# Save the average r2_scores
with open('../reports/timegpt-1/timegpt-surface-water-r2.txt', 'w') as file:
    for item in r2_average:
        file.write(f"{item}\n")

In [None]:
# Save the standar deviations
with open('../reports/timegpt-1/timegpt-surface-water-std-dev.txt', 'w') as file:
    for item in std_dev:
        file.write(f"{item}\n")

In [None]:
# Transpose the r2_scores list
r2_scores_transposed = [list(x) for x in zip(*r2_scores)]
# Pair up the stations with their r2_scores and store them in a dictionary
scores = dict(zip(station_list, r2_scores_transposed))
scores

In [None]:
# Sort them by the value in r2_scores[0]
scores_sorted = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1][0])}
scores_sorted

In [None]:
# Save the r2_scores
joblib.dump(scores_sorted, '../reports/timegpt-1/timegpt-surface-water-r2-stations')