### **Testing TimesFM**

##### Imports

In [None]:
%pip install lingvo

In [None]:
import pandas as pd
import joblib

import timesfm

from sklearn.metrics import r2_score

#### **1** Ground water data

In [None]:
# Read the dataset
aquifer_by_stations = joblib.load('aquifer_by_stations.joblib')

In [None]:
aquifers_list = [85065, 85064]

In [None]:
horizon = 5 # prediction horizon
day_len = 365 # number of days to forecast

# Define the model parameters
model = timesfm.TimesFm(
    context_len=128,
    horizon_len=horizon,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend='gpu',
)

model.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

# List for r2 results for different prediction horizons
r2_scores = [[] for _ in range(horizon)]

for aquifer in aquifers_list:
    # List for storing the predictions
    predictions = [[] for _ in range(5)]

    # Iterate from day_len days before the end, to the last day
    for i in range(day_len + (horizon-1), 0, -1):
        y = aquifer_by_stations[aquifer]
        
        forecast = model.forecast_on_df(
            inputs=y[:-i],
            freq="D",
            value_name="altitude_diff",
            num_jobs=-1,
        )

        # Store the results for every prediction horizon separately
        for i in range(horizon):
            predictions[i].append(forecast['TimesFM'][i])
    
    # Clean up the results
    predictions[0] = predictions[0][-200:]
    predictions[1] = predictions[1][3:-1]
    predictions[2] = predictions[2][2:-2]
    predictions[3] = predictions[3][1:-3]
    predictions[4] = predictions[4][0:-4]

    # Calculate the r2 scores and store them in a list
    for i in range(horizon):
        r2_scores[i].append(r2_score(aquifer_by_stations[aquifer]['altitude_diff'][-day_len:], predictions[i]))

In [None]:
# Calculate the average r2 score
r2_average =  []
std_dev = []

for i in range(5):
    r2_average.append(np.mean(r2_scores[i]))
    std_dev.append(np.std(r2_scores[i]))

In [None]:
r2_average

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(aquifer_by_stations[aquifer]['date'][-200:], aquifer_by_stations[aquifer]['altitude_diff'][-200:], color="royalblue", label="true data")
plt.plot(aquifer_by_stations[aquifer]['date'][-day_len:], predictions[0], color="tomato", label="forecast")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Save the average r2_scores
with open('../reports/timegpt-1/timegpt-ground-water-r2.txt', 'w') as file:
    for item in r2_average:
        file.write(f"{item}\n")

In [None]:
# Save the standar deviations
with open('../reports/timegpt-1/timegpt-ground-water-std-dev.txt', 'w') as file:
    for item in std_dev:
        file.write(f"{item}\n")

In [None]:
# Transpose the r2_scores list
r2_scores_transposed = [list(x) for x in zip(*r2_scores)]
# Pair up the stations with their r2_scores and store them in a dictionary
scores = dict(zip(aquifers_list, r2_scores_transposed))
scores

In [None]:
# Sort them by the value in r2_scores[0]
scores_sorted = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1][0])}
scores_sorted

In [None]:
# Save the r2_scores
joblib.dump(scores_sorted, '../reports/timegpt-1/timegpt-ground-water-r2-stations')