# Template

### Default libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


import warnings
warnings.filterwarnings('ignore')

### Default Functions

In [3]:
# Custom prediction function
def custom_predict(X, model):
    """
    Custom prediction function that overrides model predictions based on a 'flagged' column in the input data.
    
    Parameters:
    - X (DataFrame): Input data with features including a 'flagged' column.
    - model (model object): Trained model object that has a predict method.
    
    Returns:
    - y_custom_pred (array): Array of predictions with overridden values based on 'flagged' column.
    """
    y_pred = model.predict(X)
    
    # Apply the flagged logic
    y_custom_pred = np.where(X['flagged'] == 1, 0, y_pred)
    
    return y_custom_pred

def metrics(y_test, y_pred):
    """
    Computes and prints several evaluation metrics for regression models.
    
    Parameters:
    - y_test (array): True target values.
    - y_pred (array): Predicted target values from the model.
    
    Prints:
    - MSE, RMSE, MAE, R^2, and MAPE values.
    """
    # Previous metrics
    mse  = mean_squared_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    mae  = mean_absolute_error(y_test, y_pred)
    r2   = r2_score(y_test, y_pred)
    mape = 100 * (sum(abs((y_test - y_pred) / y_test)) / len(y_test))
    
    print(f" MSE = {mse}\nRMSE = {rmse}\n MAE = {mae}\n  %//R^2% = {r2}\n MAPE = {mape}%")

### load data temp

In [4]:
# Load the electricity consumption dataset
filepath = 'C:/Users/madsh/OneDrive/Dokumenter/kandidat/Fællesmappe/Speciale/Forecasting-energy-consumption-in-Denmark/Data/Combined data/'
data_flagged = pd.read_csv(filepath + 'combined_daily_flagged.csv')

# Put HourDK as DataFrame index
data_flagged.set_index('HourDK', inplace=True)

### Deualt test and training split

In [5]:
# Re-split the data into training and test sets based on the criteria
SplitDate = '2022-06-30'
training_set = data_flagged.loc[data_flagged.index <= SplitDate]
test_set = data_flagged.loc[data_flagged.index > SplitDate]