### Hourly forecasting of energy meter readings on BDG2 dataset

- historical data = 1 week (168 data points)
- forecast horizon = 1 day (24 data points)

**Loading TimesFM Model**

In [1]:
import os
import glob
import time
from datetime import datetime
import pandas as pd
import numpy as np
from collections import defaultdict

import timesfm

2024-12-18 16:44:08.627194: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.5/lib64


In [2]:
tfm = timesfm.TimesFm(
    context_len=512 ,
    horizon_len=24,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend='cpu'
)
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

2024-12-18 16:44:14.335555: W external/xla/xla/service/gpu/nvptx_compiler.cc:718] The NVIDIA driver's CUDA version is 12.2 which is older than the ptxas CUDA version (12.5.82). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Constructing model weights.




Constructed model weights in 2.50 seconds.
Restoring checkpoint from /home/user/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 0.99 seconds.
Jitting decoding.
Jitted decoding in 16.98 seconds.


In [3]:
# Data pipelining
def get_batched_data_fn(sub_df,
    batch_size: int = 128, 
    context_len: int = 168, 
    horizon_len: int = 24):
    
    examples = defaultdict(list)
    num_examples = 0
    for start in range(0, len(sub_df) - (context_len + horizon_len), horizon_len):
      num_examples += 1
      #examples["country"].append(country)
      examples["inputs"].append(sub_df["y"][start:(context_end := start + context_len)].tolist())
      #examples["gen_forecast"].append(sub_df["gen_forecast"][start:context_end + horizon_len].tolist())
      #examples["week_day"].append(sub_df["week_day"][start:context_end + horizon_len].tolist())
      examples["outputs"].append(sub_df["y"][context_end:(context_end + horizon_len)].tolist())
      examples['inputs_ts'].append(sub_df["ds"][start:(context_end := start + context_len)].tolist())
      examples["outputs_ts"].append(sub_df["ds"][context_end:(context_end + horizon_len)].tolist())

    #print(num_examples)
  
    def data_fn():
        for i in range(1 + (num_examples - 1) // batch_size):
            yield {k: v[(i * batch_size) : ((i + 1) * batch_size)] for k, v in examples.items()}
  
    return data_fn

In [4]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale

# Benchmark
batch_size = 32
context_len = 168
horizon_len = 24

def process_building(df):
   #  input_data = get_batched_data_fn(df, batch_size=32)
    input_data = get_batched_data_fn(df, batch_size=500)

    metrics = defaultdict(list)
    results_all = []
    for i, example in enumerate(input_data()):
        #print(datetime.now(), i)
        raw_forecast, _ = tfm.forecast(inputs=example["inputs"], freq=[0] * len(example["inputs"]))

        #print(f"Batch {i+1}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, Time: {end_time - start_time:.2f}s")
        for ts, y_true, y_pred in zip(example['outputs_ts'], example['outputs'], raw_forecast):
            res_df = pd.DataFrame({'ts': ts, 'y_true': y_true,'y_pred': y_pred})
            results_all.append(res_df)
        
    results_all_df = pd.concat(results_all)
    return results_all_df

def process_file(filename):
    df = pd.read_csv(filename)
    df = df.set_index(['timestamp'])

    if df.shape[1] < 2:
        return None
        
    print(datetime.now(), df.shape, flush=True)

    results_all = []
    i =0
    for building_name in df.columns:
        print(datetime.now(), building_name, flush=True)
        df1 = df[[building_name]]
        df1 = df1.reset_index()
        df1.columns = ['ds', 'y']

        #df1['y'] = MinMaxScaler().fit_transform(df1.y)
        df1['y'] = minmax_scale(df1['y'])

        res = process_building(df1)
        res['building'] = building_name
        results_all.append(res)
        i+=1
        # if i == 2:
        #    break
        #break
        
    results_all_df = pd.concat(results_all)
    return results_all_df

In [5]:
files_list = glob.glob('/home/user/New_Buildings_Datasets/Residential/Mathura_and_Bareilly/dataverse_files/processed/Mathura/hourly/*_nan.csv')
# filename = '/home/user/New_Buildings_Datasets/Mathura_and_Bareilly/dataverse_files/processed/Mathura/Mathura_2019.csv'

dataset = 'Mathura-test'
os.makedirs(f'./forecasts/{dataset}/', exist_ok = True)
os.makedirs(f'./results/{dataset}/', exist_ok = True)

for filename in files_list:
    print(datetime.now(), filename)
    results = process_file(filename)
    if results is not None:
        results.to_csv(f'./forecasts/{dataset}/{os.path.basename(filename)}')
    print('')

2024-12-18 16:44:36.353297 /home/user/New_Buildings_Datasets/Residential/Mathura_and_Bareilly/dataverse_files/processed/Mathura/hourly/Mathura_2020_nan.csv
2024-12-18 16:44:36.378025 (8784, 38)
2024-12-18 16:44:36.378614 MH01
2024-12-18 16:44:40.406416 MH02
2024-12-18 16:44:42.911743 MH03
2024-12-18 16:44:45.429049 MH06
2024-12-18 16:44:48.000830 MH07
2024-12-18 16:44:50.523815 MH08
2024-12-18 16:44:53.118831 MH09
2024-12-18 16:44:55.664761 MH10
2024-12-18 16:44:58.383395 MH11
2024-12-18 16:45:02.274334 MH12
2024-12-18 16:45:04.788891 MH14
2024-12-18 16:45:07.375278 MH15
2024-12-18 16:45:09.899907 MH17
2024-12-18 16:45:12.483796 MH18
2024-12-18 16:45:15.034461 MH20
2024-12-18 16:45:17.547343 MH21
2024-12-18 16:45:20.062513 MH22
2024-12-18 16:45:24.121448 MH23
2024-12-18 16:45:26.642273 MH24
2024-12-18 16:45:29.199877 MH25
2024-12-18 16:45:31.765204 MH26
2024-12-18 16:45:34.418190 MH27
2024-12-18 16:45:37.087225 MH28
2024-12-18 16:45:39.793203 MH30
2024-12-18 16:45:42.462764 MH31
2024-1

### Metrics

In [6]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import root_mean_squared_log_error
from permetrics.regression import RegressionMetric

dataset = 'Mathura-test'
files_list = glob.glob(f'./forecasts/{dataset}/*.csv')

metrics_all_files = []

for filename in files_list:
    res = pd.read_csv(filename)
    metrics_all = []
    for (g, data) in res.groupby(['building']):
        data = data.dropna()
        data = data[data.y_pred >= 0]
        print(g)  
        if data.empty:
            continue
        rmse= root_mean_squared_error(data.y_true, data.y_pred)
        mae= mean_absolute_error(data.y_true, data.y_pred)
        mape = mean_absolute_percentage_error(data.y_true, data.y_pred)
        mse= mean_squared_error(data.y_true, data.y_pred)
        msle= mean_squared_log_error(data.y_true, data.y_pred)
        rmsle= root_mean_squared_log_error(data.y_true, data.y_pred)
        nrmse = rmse / (data.y_true.mean()) 

        evaluator = RegressionMetric(data.y_true.to_list(), data.y_pred.to_list())
        nrmse_eve = evaluator.normalized_root_mean_square_error()
        evaluator = RegressionMetric(data.y_true.to_list(), data.y_pred.to_list())
        smape= evaluator.symmetric_mean_absolute_percentage_error()
    
        metrics = pd.DataFrame({'building_name': [g[0]], 
                           'mae': [mae],
                            'mape': [mape],
                           'mse': [mse], 'rmse': [rmse], 'msle': [msle], 'rmsle': [rmsle], 'nrmse' : [nrmse],
                              'nrmse_eve':[nrmse_eve] , 'sMAPE' : [smape]})
        metrics_all.append(metrics)
    
    metrics_all_df = pd.concat(metrics_all)
    metrics_all_df.to_csv(f'./results/{dataset}/{os.path.basename(filename)}')

    metrics_all_df['filename'] = os.path.basename(filename)
    metrics_all_files.append(metrics_all_df)

metrics_all_files_df = pd.concat(metrics_all_files)

('MH01',)
('MH02',)
('MH03',)
('MH06',)
('MH07',)
('MH08',)
('MH09',)
('MH10',)
('MH11',)
('MH12',)
('MH14',)
('MH15',)
('MH17',)
('MH18',)
('MH20',)
('MH21',)
('MH22',)
('MH23',)
('MH24',)
('MH25',)
('MH26',)
('MH27',)
('MH28',)
('MH30',)
('MH31',)
('MH33',)
('MH34',)
('MH35',)
('MH36',)
('MH37',)
('MH38',)
('MH39',)
('MH41',)
('MH42',)
('MH43',)
('MH45',)
('MH46',)
('MH47',)
('MH01',)
('MH03',)
('MH06',)
('MH07',)
('MH08',)
('MH09',)
('MH10',)
('MH11',)
('MH12',)
('MH15',)
('MH17',)
('MH18',)
('MH20',)
('MH21',)
('MH22',)
('MH23',)
('MH24',)
('MH25',)
('MH26',)
('MH27',)
('MH28',)
('MH30',)
('MH31',)
('MH33',)
('MH34',)
('MH35',)
('MH36',)
('MH37',)
('MH39',)
('MH41',)
('MH42',)
('MH43',)
('MH45',)
('MH46',)
('MH47',)
('MH01',)
('MH02',)
('MH03',)
('MH06',)
('MH07',)
('MH08',)
('MH09',)
('MH10',)
('MH11',)
('MH12',)
('MH14',)
('MH15',)
('MH17',)
('MH18',)
('MH20',)
('MH21',)
('MH22',)
('MH23',)
('MH24',)
('MH25',)
('MH26',)
('MH27',)
('MH28',)
('MH30',)
('MH31',)
('MH33',)
('MH34',)


In [7]:
metrics_all_files_df.to_csv(f'./results/{dataset}/results_combined.csv')
metrics_all_files_df

Unnamed: 0,building_name,mae,mape,mse,rmse,msle,rmsle,nrmse,nrmse_eve,sMAPE,filename
0,MH02,0.101277,2.771983e+13,0.023310,0.152677,0.011753,0.108412,0.386663,0.716388,0.153253,Mathura_2020_nan.csv
0,MH03,0.021819,2.869312e+12,0.001112,0.033342,0.000883,0.029715,0.429309,0.756857,0.149722,Mathura_2020_nan.csv
0,MH06,0.057767,1.716859e+13,0.009751,0.098749,0.006449,0.080306,1.062951,2.179068,0.315431,Mathura_2020_nan.csv
0,MH07,0.021768,2.713908e+12,0.002212,0.047029,0.001487,0.038560,0.882838,1.031293,0.184041,Mathura_2020_nan.csv
0,MH08,0.042662,2.233859e+13,0.004985,0.070607,0.003438,0.058634,0.394545,1.601755,0.127157,Mathura_2020_nan.csv
...,...,...,...,...,...,...,...,...,...,...,...
0,MH42,0.094823,3.896338e+13,0.017376,0.131817,0.010021,0.100105,0.392824,1.120172,0.180415,Mathura_2019_nan.csv
0,MH43,0.066776,4.498040e+12,0.011283,0.106222,0.006456,0.080348,0.535550,0.734670,0.175416,Mathura_2019_nan.csv
0,MH45,0.074951,1.837768e+13,0.013065,0.114303,0.008133,0.090181,0.729909,0.809725,0.259080,Mathura_2019_nan.csv
0,MH46,0.118000,5.982589e+13,0.024375,0.156124,0.014745,0.121429,0.524654,1.045528,0.266779,Mathura_2019_nan.csv


In [8]:
metrics_all_files_df.describe()*100

Unnamed: 0,mae,mape,mse,rmse,msle,rmsle,nrmse,nrmse_eve,sMAPE
count,9000.0,9000.0,9000.0,9000.0,9000.0,9000.0,9000.0,9000.0,9000.0
mean,7.363568,2634251000000000.0,1.427711,11.397742,0.832035,8.757975,65.71411,123.054118,23.867558
std,2.820394,3020407000000000.0,0.79091,3.60654,0.439413,2.564062,25.993111,78.365702,9.17899
min,2.058544,28.07201,0.111171,3.334227,0.088296,2.971471,31.093778,59.979607,10.090298
25%,5.3676,500456800000000.0,0.843206,9.182622,0.49604,7.041053,47.001409,81.277218,17.415486
50%,7.667418,1785461000000000.0,1.348019,11.610392,0.805448,8.974675,56.430186,98.893705,21.235198
75%,9.253556,3894992000000000.0,2.004073,14.156471,1.159548,10.76822,83.644863,138.228616,28.029441
max,14.938184,2.002955e+16,3.621532,19.030324,2.300191,15.16638,144.782959,613.766764,61.540758
