In [2]:
#!pip install tensorflow==2.15.0  # Erstat med den ønskede version. Denne er nødvendig for at bruge TCN
#!pip install keras_tuner
#!pip install keras-tcn
# Grundlæggende pakker
import numpy as np
import matplotlib.pyplot as plt

# Deep Learning med TensorFlow og Keras
import tensorflow as tf
from tensorflow.keras.utils import plot_model

import logging
import os
import sys
import importlib

# Check if GPU is available and configure memory growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for gpu in physical_devices:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU is available and memory growth enabled")
    except RuntimeError as e:
        print("Memory growth setting failed:", e)
else:
    print("No GPU found. Using CPU.")

# Check TensorFlow version for reference
print("TensorFlow Version:", tf.__version__)

# Additional GPU details
gpus = tf.config.list_physical_devices('GPU')
print("Available GPUs:", gpus)

# Optional: To specify a particular GPU, if multiple are available
gpu_number = 0  # Change this index if needed
if len(gpus) >= 2:
    try:
        tf.config.experimental.set_visible_devices(gpus[gpu_number], 'GPU')
        print(f"Using GPU: {gpus[gpu_number]}")
    except RuntimeError as e:
        print("Failed to set specific GPU:", e)

ROOT_PATH = '/Users/rasmusklitteandersen/Library/CloudStorage/GoogleDrive-rasmusklitteandersen@gmail.com/Mit drev/speciale/'

# ROOT_PATH = '/content/drive/MyDrive/speciale/'
# from google.colab import drive
# drive.mount('/content/drive')

# gpu_info = !nvidia-smi
# gpu_info = '\n'.join(gpu_info)
# if gpu_info.find('failed') >= 0:
#   print('Not connected to a GPU')
# else:
#   print(gpu_info)

# from psutil import virtual_memory
# ram_gb = virtual_memory().total / 1e9
# print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

# if ram_gb < 20:
#   print('Not using a high-RAM runtime')
# else:
#   print('You are using a high-RAM runtime!')

No GPU found. Using CPU.
TensorFlow Version: 2.18.0
Available GPUs: []


In [19]:
# Set up paths and configurations
#ROOT_PATH = '/content/drive/MyDrive/speciale/'
sys.path.append(ROOT_PATH)
os.chdir(ROOT_PATH)

# Import custom utilities and modules
from utils.misc import LoadData, TerminateNaN, Plotting, EvaluationMetric
from utils.models import ModelTrainer

# ===== CONFIGURATION SECTION =====
# Paths
DATA_PATH = f'{ROOT_PATH}data/final_dataset_test.csv'
MODEL_PATH = f'{ROOT_PATH}models/'
IMAGE_PATH = f'{ROOT_PATH}/images/'
LOG_DIR = f'{MODEL_PATH}/logs/'
TUNING_DIR = f'{MODEL_PATH}/tuning/'
RESULTS_DIR = f'{ROOT_PATH}results/'
TABLES_DIR = f'{ROOT_PATH}tables/'

# Flags and Options
INCLUDE_LAGS = True
INCLUDE_SEASON_VARS = False
INCLUDE_WEATHER = False
TEST = False
TIME_START = '2019-10-31'
TIME_END_PERIODS = ['2021-09-30', '2023-01-01', '2024-07-01']
MODELS = ['LSTM', 'TCN', 'Hybrid', 'Transformer']
#MODELS = ['Hybrid', 'Transformer']
TUNER = 'Hyperband'

# Training Parameters
TRAINING_EPOCH = 30
FINAL_MODEL_EPOCH = 50
BATCH_SIZE = 64
LOSS = 'mean_absolute_error'

# Generate dynamic strings based on flags
def get_extra_info():
    extra_info = ''
    if not INCLUDE_WEATHER:
        extra_info += '_no_weather'
    if not INCLUDE_LAGS:
        extra_info += '_no_lags'
    if not INCLUDE_SEASON_VARS:
        extra_info += '_no_season_vars'
    return extra_info

# ===== LOGGING SETUP =====
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

# ===== FUNCTIONS =====

def reload_utils_misc():
    """Function to reload utils.misc module if changes are made."""
    import utils.misc
    importlib.reload(utils.misc)


def setup_directories(test_mode, image_path, log_dir, tuning_dir, results_dir, tables_dir):
    """Set up directories based on whether TEST mode is enabled."""
    test_suffix = 'test/'
    
    if test_mode: 
        image_path = f"{image_path}{test_suffix}"
        log_dir = f"{log_dir}{test_suffix}"
        tuning_dir = f"{tuning_dir}{test_suffix}"
        results_dir = f"{results_dir}{test_suffix}"
        tables_dir = f"{tables_dir}{test_suffix}"
        
    return image_path, log_dir, tuning_dir, results_dir, tables_dir


def initialize_data_loader():
    """Initialize the data loading utility with required flags."""
    return LoadData(INCLUDE_LAGS, INCLUDE_SEASON_VARS, INCLUDE_WEATHER, TUNER)

def initialize_evaluation_metric():
    """Initialize the evaluation metric utility with required flags."""
    return EvaluationMetric(INCLUDE_LAGS, INCLUDE_SEASON_VARS, INCLUDE_WEATHER, TUNER)

# ===== MAIN SCRIPT =====

if __name__ == "__main__":
    # Setup directories based on TEST mode
    # dirs = setup_directories(TEST)
    IMAGE_PATH, LOG_DIR, TUNING_DIR, RESULTS_DIR, TABLES_DIR = setup_directories(
    test_mode=TEST, 
    image_path=IMAGE_PATH, 
    log_dir=LOG_DIR, 
    tuning_dir=TUNING_DIR, 
    results_dir=RESULTS_DIR, 
    tables_dir=TABLES_DIR
    )

    extra_info = get_extra_info()
    
    # Reload utils if changes were made
    reload_utils_misc()

    # Initialize components
    load_data = initialize_data_loader()
    terminate_nan = TerminateNaN()
    evaluation_metric = initialize_evaluation_metric()

    # Set random seeds for reproducibility
    tf.random.set_seed(14)
    np.random.seed(14)

    # Log basic configuration
    logger.info(f"Starting training with configuration:")
    logger.info(f"Data path: {DATA_PATH}")
    logger.info(f"Model path: {MODEL_PATH}")
    logger.info(f"Model path: {TUNING_DIR}")
    logger.info(f"Model path: {RESULTS_DIR}")
    logger.info(f"Include Lags: {INCLUDE_LAGS}, Include Season Vars: {INCLUDE_SEASON_VARS}, Include Weather: {INCLUDE_WEATHER}")
    logger.info(f"Training epochs: {TRAINING_EPOCH}, Final model epochs: {FINAL_MODEL_EPOCH}, Batch size: {BATCH_SIZE}")

INFO:root:Starting training with configuration:
INFO:root:Data path: /Users/rasmusklitteandersen/Library/CloudStorage/GoogleDrive-rasmusklitteandersen@gmail.com/Mit drev/speciale/data/final_dataset_test.csv
INFO:root:Model path: /Users/rasmusklitteandersen/Library/CloudStorage/GoogleDrive-rasmusklitteandersen@gmail.com/Mit drev/speciale/models/
INFO:root:Model path: /Users/rasmusklitteandersen/Library/CloudStorage/GoogleDrive-rasmusklitteandersen@gmail.com/Mit drev/speciale/models//tuning/
INFO:root:Model path: /Users/rasmusklitteandersen/Library/CloudStorage/GoogleDrive-rasmusklitteandersen@gmail.com/Mit drev/speciale/results/
INFO:root:Include Lags: True, Include Season Vars: False, Include Weather: False
INFO:root:Training epochs: 30, Final model epochs: 50, Batch size: 64


In [31]:
import pickle
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import acf
from scipy import stats

# Sample function for Diebold-Mariano test
def diebold_mariano_test(errors_model1, errors_model2):
    d = errors_model1 - errors_model2
    d = np.ravel(d)
    mean_d = np.mean(d)
    acf_values = acf(d, fft=True)
    variance_d = np.var(d) * (1 + 2 * sum(acf_values[1:])) / len(d)
    dm_stat = mean_d / np.sqrt(variance_d)
    p_value = 2 * (1 - stats.norm.cdf(abs(dm_stat)))
    return dm_stat, p_value

######### DISSE VÆRDIER SKAL MODIFICERES ######### 
TIME_END = TIME_END_PERIODS[2]
BEST_MODEL = 'TCN'
print(f'Time end: {TIME_END}')
endswith = ['__Hyperband', '_ex_wheater', '__Hyperband_ex_season_dummies_ex_wheater']
endswith_path = endswith[1]
###################################################


df, TIME_PERIOD = load_data.load_and_preprocess_data(DATA_PATH, TIME_START, TIME_END)
print(f'Time period: {TIME_PERIOD}')

results = []
with open(f'{RESULTS_DIR}predictions/{BEST_MODEL}/{BEST_MODEL}_{TIME_PERIOD}_Hyperband_predictions{endswith_path}.pkl', 'rb') as f:
        predictions_1 = pickle.load(f)

for i in range(len(MODELS)):  
    
    MODEL = MODELS[i]
    print(MODEL)
    with open(f'{RESULTS_DIR}predictions/{MODEL}/{MODEL}_{TIME_PERIOD}_Hyperband_predictions{endswith_path}.pkl', 'rb') as f:
        predictions_2 = pickle.load(f)

    log_dir, tuning_dir = load_data.setup_directories(LOG_DIR, TUNING_DIR, TIME_PERIOD, MODEL)
    datasets, y, X_train, X_test, scaler_y, y_test = load_data.split_and_scale_data(df)

    errors_model1 = np.abs(np.array(y_test[:len(predictions_1)]) - predictions_1)
    errors_model2 = np.abs(np.array(y_test[:len(predictions_1)]) - predictions_2)
    d = errors_model1 - errors_model2

    # Perform Diebold-Mariano test on the errors
    dm_stat, p_value = diebold_mariano_test(errors_model1, errors_model2)
    print(f"Diebold-Mariano Test Statistic: {dm_stat}")
    print(f"P-Value: {p_value}")

    # Interpretation
    if p_value < 0.1:
        print("The difference in forecasting errors is statistically significant.")
    else:
        print("The difference in forecasting errors is not statistically significant.")
    # Collect results in a list for the CSV
    

    interpretation = (
            "Statistically significant difference" if p_value < 0.1
            else "No statistically significant difference"
        )

    # Save results
    results.append({
        "Time Period": TIME_PERIOD,
        "Best Model": BEST_MODEL,
        "Compared Model": MODEL,
        "Diebold-Mariano Statistic": dm_stat,
        "P-Value": p_value,
        "Interpretation": interpretation
    })
    print(results)

# Convert results to a DataFrame and save to CSV
results_df = pd.DataFrame(results)
csv_path = f"{RESULTS_DIR}diebold_mariano/diebold_mariano_results_{TIME_PERIOD}{extra_info}.csv"
results_df.to_csv(csv_path, index=False)

Time end: 2024-07-01
Time period: 2019-10-31_to_2024-07-01
LSTM
Diebold-Mariano Test Statistic: -0.9471812583297132
P-Value: 0.3435464237755075
The difference in forecasting errors is not statistically significant.
[{'Time Period': '2019-10-31_to_2024-07-01', 'Best Model': 'TCN', 'Compared Model': 'LSTM', 'Diebold-Mariano Statistic': -0.9471812583297132, 'P-Value': 0.3435464237755075, 'Interpretation': 'No statistically significant difference'}]
TCN
Diebold-Mariano Test Statistic: nan
P-Value: nan
The difference in forecasting errors is not statistically significant.
[{'Time Period': '2019-10-31_to_2024-07-01', 'Best Model': 'TCN', 'Compared Model': 'LSTM', 'Diebold-Mariano Statistic': -0.9471812583297132, 'P-Value': 0.3435464237755075, 'Interpretation': 'No statistically significant difference'}, {'Time Period': '2019-10-31_to_2024-07-01', 'Best Model': 'TCN', 'Compared Model': 'TCN', 'Diebold-Mariano Statistic': nan, 'P-Value': nan, 'Interpretation': 'No statistically significant dif

  acf = avf[: nlags + 1] / avf[0]


In [None]:

#mae = metrics.mae(np.array(y_test[:len(predictions)]), np.array(predictions))

errors_model1 = np.abs(np.array(y_test[:len(predictions_1)]) - predictions_1)
errors_model2 = np.abs(np.array(y_test[:len(predictions_1)]) - predictions_2)
d = errors_model1 - errors_model2
print(d)

# Sample function for Diebold-Mariano test
def diebold_mariano_test(errors_model1, errors_model2):
    d = errors_model1 - errors_model2
    d = np.ravel(d)
    mean_d = np.mean(d)
    acf_values = acf(d, fft=True)
    variance_d = np.var(d) * (1 + 2 * sum(acf_values[1:])) / len(d)
    dm_stat = mean_d / np.sqrt(variance_d)
    p_value = 2 * (1 - stats.norm.cdf(abs(dm_stat)))
    return dm_stat, p_value

# Perform Diebold-Mariano test on the errors
dm_stat, p_value = diebold_mariano_test(errors_model1, errors_model2)
print(f"Diebold-Mariano Test Statistic: {dm_stat}")
print(f"P-Value: {p_value}")

# Interpretation
if p_value < 0.05:
    print("The difference in forecasting errors is statistically significant.")
else:
    print("The difference in forecasting errors is not statistically significant.")