### Testing Chronos on surface water data

Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import joblib

import time
from datasetsforecast.m3 import M3
from utilsforecast.losses import *
from utilsforecast.evaluation import evaluate
import torch
from chronos import ChronosPipeline

from sklearn.metrics import r2_score

Data preparation

In [None]:
# Read the dataset
watercourse_by_stations = joblib.load('../data/interim/watercourse_by_stations.joblib')

In [None]:
# Ensure datetime
for key, data in watercourse_by_stations.items():
    data['date'] = pd.to_datetime(data['date'])

Standard scaling

In [None]:
# Define the standard scaling function
def standard_scaling(x):
    mean = np.mean(np.abs(x))
    s = np.std(x)

    return (x - mean)/s

# Define the reverse function of standard scaling
def standard_unscaling(original, scaled):
    mean = np.mean(np.abs(original))
    s = np.std(original)

    return (scaled * s) + mean

Visualise data

In [None]:
# Relative diffeences in level
plt.figure(figsize=(8, 4))
plt.plot(watercourse_by_stations[1335]['date'], watercourse_by_stations[1335]['level_diff'], color="royalblue", label="water level difference")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Absolute water level
plt.figure(figsize=(8, 4))
plt.plot(watercourse_by_stations[1335]['date'], watercourse_by_stations[1335]['level'], color="royalblue", label="water level")
plt.legend()
plt.grid()
plt.show()

#### Forecast for level differences (multiple stations)

In [None]:
# List of station used for testing
station_list = ['2530', '2620', '4200', '4230', '4270', '4515', '4520', '4570', '4575', '5040', '5078', '5330', '5425', '5500', '6060', '6068', '6200', '6220', '6300', '6340', '8454', '8565']

In [None]:
# Cast the stations to int
for i in range(len(station_list)):
    station_list[i] = int(station_list[i])

In [None]:
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-large",
    device_map="cuda",
    torch_dtype=torch.bfloat16,
)

horizon = 5
day_len = 365

# List for r2 results for different prediction horizons
r2_scores = [[] for _ in range(5)]
predictions = []

for station in station_list:
    # List for storing the predictions
    predictions = [[] for _ in range(5)]

    # Iterate from day_len days before the end, to the last day
    for i in range(day_len + 4, 0, -1):
        y = watercourse_by_stations[station]['level_diff'].values
        #y = standard_scaling(y)
        y = torch.tensor(y[:-i])

        forecast = pipeline.predict(
            context= y,
            prediction_length=horizon,
            num_samples=20
        )

        low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)

        #median = standard_unscaling(watercourse_by_stations[station]['level_diff'], median)

        # Store the results for every prediction horizon separately
        for i in range(5):
            predictions[i].append(median[i])
    
    # Clean up the results
    predictions[0] = predictions[0][-day_len:]
    predictions[1] = predictions[1][3:-1]
    predictions[2] = predictions[2][2:-2]
    predictions[3] = predictions[3][1:-3]
    predictions[4] = predictions[4][0:-4]

    # Calculate the r2 scores and store them in a list
    for i in range(5):
        r2_scores[i].append(r2_score(watercourse_by_stations[station]['level_diff'][-day_len:], predictions[i]))

Visualise results

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(watercourse_by_stations[station_list[0]]['date'][-200:], watercourse_by_stations[1335]['level_diff'][-200:], color="royalblue", label="water level")
plt.plot(watercourse_by_stations[station_list[0]]['date'][-day_len:], predictions[0], color="tomato", label="median forecast")
plt.legend()
plt.grid()
plt.show()

Evaluation

In [None]:
# Calculate the average r2 score
r2_average =  []
std_dev = []

for i in range(5):
    r2_average.append(np.mean(r2_scores[i]))
    std_dev.append(np.std(r2_scores[i]))

In [None]:
r2_average

In [None]:
# Save the average r2_scores
with open('../reports/chronos-large/chronos-large-surface-water-r2.txt', 'w') as file:
    for item in r2_average:
        file.write(f"{item}\n")

In [None]:
# Save the standard deviations
with open('../reports/chronos-large/chronos-large-surface-water-std-dev.txt', 'w') as file:
    for item in std_dev:
        file.write(f"{item}\n")

In [None]:
# Transpose the r2_scores list
r2_scores_transposed = [list(x) for x in zip(*r2_scores)]
# Pair up the stations with their r2_scores and store them in a dictionary
scores = dict(zip(station_list, r2_scores_transposed))
scores

In [None]:
# Sort them by the value in r2_scores[0]
scores_sorted = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1][0])}
scores_sorted

In [None]:
# Save the r2_scores
joblib.dump(scores_sorted, '../reports/chronos-large/chronos-large-surface-water-r2-stations.joblib')

#### Forecast for absolute water levels

In [None]:
# List of station used for testing
station_list = ['2530', '2620', '4200', '4230', '4270', '4515', '4520', '4570', '4575', '5040', '5078', '5330', '5425', '5500', '6060', '6068', '6200', '6220', '6300', '6340', '8454', '8565']

In [None]:
station_list = [1335]

In [None]:
# Cast the stations to int
for i in range(len(station_list)):
    station_list[i] = int(station_list[i])

In [None]:
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-large",
    device_map="cuda",
    torch_dtype=torch.bfloat16,
)

horizon = 5
day_len = 200

# List for r2 results for different prediction horizons
r2_scores = [[] for _ in range(5)]
predictions = []

for station in station_list:
    # List for storing the predictions
    predictions = [[] for _ in range(5)]

    # Iterate from day_len days before the end, to the last day
    for i in range(day_len + 4, 0, -1):
        y = watercourse_by_stations[station]['level'].values
        y = standard_scaling(y)
        y = torch.tensor(y[:-i])

        forecast = pipeline.predict(
            context= y,
            prediction_length=horizon,
            num_samples=20
        )

        low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)

        median = standard_unscaling(watercourse_by_stations[station]['level'], median)

        # Store the results for every prediction horizon separately
        for i in range(5):
            predictions[i].append(median[i])
    
    # Clean up the results
    predictions[0] = predictions[0][-200:]
    predictions[1] = predictions[1][3:-1]
    predictions[2] = predictions[2][2:-2]
    predictions[3] = predictions[3][1:-3]
    predictions[4] = predictions[4][0:-4]

    # Calculate the r2 scores and store them in a list
    for i in range(5):
        r2_scores[i].append(r2_score(watercourse_by_stations[station]['level'][-day_len:], predictions[i]))

Visualise results

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(watercourse_by_stations[station_list[0]]['date'][-200:], watercourse_by_stations[1335]['level'][-200:], color="royalblue", label="water level")
plt.plot(watercourse_by_stations[station_list[0]]['date'][-day_len:], predictions[0], color="tomato", label="median forecast")
plt.legend()
plt.grid()
plt.show()

Evaluation

In [None]:
# Calculate the average r2 score
r2_average =  []
std_dev = []

for i in range(5):
    r2_average.append(np.mean(r2_scores[i]))
    std_dev.append(np.std(r2_scores[i]))

In [None]:
r2_average

In [None]:
std_dev

In [None]:
scores = dict(zip(watercourse_by_stations.keys(), r2_scores[0]))
scores = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1])}

In [None]:
scores