### **Generating figures, tables**

### **1** Tables

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)

# Styling the DataFrame (with pandas)
styled_df = df.style.set_table_styles(
    [{'selector': 'th', 'props': [('background-color', '#40466e'), ('color', 'white')]},
     {'selector': 'td', 'props': [('border', '1px solid black')]}]
).set_properties(**{'text-align': 'center', 'border': '1px solid black'})

# Display styled DataFrame as an image using Matplotlib
fig, ax = plt.subplots(figsize=(5, 2))
ax.axis('off')
ax.table(cellText=df.values, colLabels=df.columns, cellLoc='center', loc='center', bbox=[0, 0, 1, 1])
plt.show()


In [None]:
import pandas as pd
import numpy as np

# Sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'Country': ['USA', 'Canada', 'UK']
}
df = pd.DataFrame(data)

# Applying red background color to the entire table
styled_df = df.style.applymap(lambda x: 'background-color: white; color: black')

# To display in Jupyter Notebook or save as HTML
styled_df


In [None]:
import numpy as np

##### Import all of the r2 scores

In [None]:
# chronos large
with open('../reports/chronos-large/chronos-large-ground-water-r2.txt', 'r') as file:
    chronos_large_r2 = [line.strip() for line in file.readlines()]

# gradient-boosting
with open('../reports/gradient-boosting/gradient-boosting-ground-water-r2.txt', 'r') as file:
    gradient_boosting_r2 = [line.strip() for line in file.readlines()]

# random forest
with open('../reports/randomforest/randomforest-ground-water-r2.txt', 'r') as file:
    randomforest_r2 = [line.strip() for line in file.readlines()]

# n-beats
with open('../reports/n-beats/n-beats-ground-water-r2.txt', 'r') as file:
    n_beats_r2 = [line.strip() for line in file.readlines()]

# patch-tst
with open('../reports/patchtst/patchtst-ground-water-r2.txt', 'r') as file:
    patch_tst_r2 = [line.strip() for line in file.readlines()]

# linear regression
with open('../reports/linear-regression/linear-regression-ground-water-r2.txt', 'r') as file:
    linear_regression_r2 = [line.strip() for line in file.readlines()]


##### Dataframe with all of the results

In [None]:
# Define all of the used models
model_names = ['Chronos-large', 'GradientBoostingRegressor', 'RandomForestRegressor', 'N-BEATS', 'PatchTST', 'LinearRegression']
model_scores = [chronos_large_r2, gradient_boosting_r2, randomforest_r2, n_beats_r2, patch_tst_r2, linear_regression_r2]

In [None]:
# Create the dataframe with results
results_df = pd.DataFrame(
    {
        'methods': model_names
    }
)

#Transpose the model_scores (we'll have the scores grouped by prediction horizons)
scores_by_horizon = [list(x) for x in zip(*model_scores)]

# Add the results
# Iterate over every prediction horizon 
for scores, horizon in zip(scores_by_horizon, range(5)):
    results_df[f'{horizon+1} day ahead'] = np.round(np.array(scores).astype(float), 3)

In [None]:
results_df

In [None]:
results_df.to_excel('../reports/figures/results_table.xlsx')

### **2** Figures

In [None]:
import matplotlib.pyplot as plt
import joblib

##### Import all of the predictions

In [None]:
chronos_large_predictions = joblib.load('../reports/chronos-large/chronos-large-ground-water-predictions.joblib')
gradient_boosting_predictions = joblib.load('../reports/gradient-boosting/gradient-boosting-ground-water-predictions.joblib')
randomforest_predictions = joblib.load('../reports/randomforest/randomforest-ground-water-predictions.joblib')
n_beats_predictions = joblib.load('../reports/n-beats/n-beats-ground-water-predictions.joblib')
patch_tst_predictions = joblib.load('../reports/patchtst/patchtst-ground-water-predictions.joblib')
linear_regression_predictions = joblib.load('../reports/linear-regression/linear-regression-ground-water-predictions.joblib')

In [None]:
# Read the true data
aquifer_by_stations = joblib.load('aquifer_by_stations.joblib')

# Set the aquifer
aquifer = 85065

In [None]:
# Delete the last 5 days of true data
aquifer_by_stations[aquifer] = aquifer_by_stations[aquifer][:-5]

In [None]:
def plot_predictions(horizon):    
    plt.figure(figsize=(8, 4))
    plt.plot(aquifer_by_stations[aquifer]['date'][-350:-280], aquifer_by_stations[aquifer]['altitude_diff'][-350:-280], color="black", label="True data")
    plt.plot(aquifer_by_stations[aquifer]['date'][-350:-280], chronos_large_predictions[aquifer][horizon][-350:-280], color='#0072B2', label="Chronos")
    #plt.plot(aquifer_by_stations[aquifer]['date'][-350:-280], gradient_boosting_predictions[aquifer][horizon][-350:-280], color="dimgrey", label="forecast")
    #plt.plot(aquifer_by_stations[aquifer]['date'][-350:-280], randomforest_predictions[aquifer][horizon][-350:-280], color="grey", label="forecast")
    #plt.plot(aquifer_by_stations[aquifer]['date'][-350:-280], n_beats_predictions[aquifer][horizon][-350:-280], color="darkgrey", label="forecast")
    plt.plot(aquifer_by_stations[aquifer]['date'][-350:-280], patch_tst_predictions[aquifer][horizon][-350:-280], color='#E69F00', label="PatchTST")
    plt.plot(aquifer_by_stations[aquifer]['date'][-350:-280], linear_regression_predictions[aquifer][horizon][-350:-280], color='#CC79A7', label="LinearRegression")
    plt.legend()
    plt.grid()
    plt.title(f'Predictions for Horizon {horizon+1}')
    plt.ylabel('Water level change (m)')
    plt.xlabel('Time')
    plt.savefig(f'../reports/figures/predictions_horizon{horizon+1}.pdf', format='pdf')
    plt.show()

In [None]:
for horizon in range(5):
    plot_predictions(horizon)

##### Graphs of r2 scores

In [None]:
# Models and horizon
models = ['Chronos-large', 'GradientBoostingRegressor', 'RandomForestRegressor', 'N-BEATS', 'PatchTST', 'LinearRegression']
horizons = [f'{day} day{'s' if day > 1 else ''}' for day in range(1, 6)]

# R2 scores
r2_scores = np.array(model_scores).astype(float)

# Set up the figure and axis
fig, ax = plt.subplots(figsize=(10, 4))

# Define the width of the bars
bar_width = 0.115
# Define positions of the bars on x-axis
r = np.arange(len(horizons))

# Define colors for each model
colors = ['#0072B2', '#E69F00', '#009E73', '#CC79A7', '#D55E00', '#56B4E9']

# Plot each model's R² scores
for i, (model, color) in enumerate(zip(models, colors)):
    ax.bar(r + i * bar_width, r2_scores[i], width=bar_width, color=color, label=model)

# Add labels and title
ax.set_xlabel('Prediction Horizons')
ax.set_ylabel('R² Scores')
ax.set_title('R² Scores for Different Models and Prediction Horizons')
ax.set_xticks(r + bar_width * (len(models) - 1) / 2)
ax.set_xticklabels(horizons, rotation=45)

# Place the legend outside the plot area
ax.legend(title='Models', bbox_to_anchor=(1, 1.03), loc='upper left')

# Show the plot
plt.tight_layout()
plt.savefig('../reports/figures/r2_scores_graph.pdf', format='pdf')
plt.show()


In [None]:
r2_scores = np.random.rand(len(models), len(horizons))
r2_scores

In [None]:
np.round(np.array(model_scores).astype(float), 3)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data
models = ['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5', 'Model 6']
horizons = ['Horizon 1', 'Horizon 2', 'Horizon 3', 'Horizon 4', 'Horizon 5']

# Randomly generated R² scores for demonstration
np.random.seed(0)
r2_scores = np.random.rand(len(models), len(horizons))

# Set up the figure and axis
fig, ax = plt.subplots(figsize=(10, 7))

# Define the width of the bars
bar_width = 0.15
# Define positions of the bars on x-axis
r = np.arange(len(horizons))

# Plot each model's R² scores
for i, model in enumerate(models):
    ax.bar(r + i * bar_width, r2_scores[i], width=bar_width, label=model)

# Add labels and title
ax.set_xlabel('Prediction Horizons')
ax.set_ylabel('R² Scores')
ax.set_title('R² Scores for Different Models and Prediction Horizons')
ax.set_xticks(r + bar_width * (len(models) - 1) / 2)
ax.set_xticklabels(horizons, rotation=45)
ax.legend(title='Models')

# Show the plot
plt.tight_layout()
plt.show()


### **3** R2 score for absolute water level

In [None]:
import joblib
# Read the dataset
aquifer_by_stations = joblib.load('aquifer_by_stations.joblib')

In [None]:
relative_aquifer = aquifer_by_stations[85065][:-5]

In [None]:
day_len = 365

In [None]:
day_1 = joblib.load('../reports/chronos-large/chronos-large-ground-water-predictions.joblib')
day_1 = day_1[85065]
day_1 = day_1[0]

In [None]:
len(day_1)

In [None]:
# Change the predicted relative differences to the absolute altitudes

# Get the last day_len + 1 days without the last one
altitudes = relative_aquifer['altitude'][-(day_len+1):-1]

# Sum original altitudes and relative differences
altitudes = altitudes + day_1

In [None]:
from sklearn.metrics import r2_score
# Calculate the r2 score
r2_score(relative_aquifer['altitude'][-day_len:], altitudes)