In [None]:
from math import floor, ceil
import os
from tempfile import tempdir
import pandas as pd
import numpy as np
from numpy import newaxis

import warnings
warnings.filterwarnings("ignore")

from datetime import datetime
from msal import PublicClientApplication

import holidays
from datetime import date

from cognite.client import CogniteClient
from cognite.client.data_classes import TimeSeries, Asset

import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
from keras.models import load_model
from keras.layers import LeakyReLU

import matplotlib.pyplot as plt

import xgboost
from sklearn.model_selection import train_test_split

from tabulate import tabulate

import plotly.express as px

import wapi # wattsights egen pakke / den må installers via `pip install wapi-python`
import os
from datetime import datetime 
from datetime import timedelta

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score

import catboost as cb
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import GRU
import itertools

# import packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from sklearn.metrics import accuracy_score


In [None]:
def get_data_cdf(start_year,start_month,start_day,end_year,end_month,end_day):
    """
    Retrieve data stored in Cognite Data Fusion
    """

    # Log-in detaljer
    TENANT_ID = os.getenv("AZURE_TENANT_ID")
    CLIENT_ID = os.getenv("AZURE_CLIENT_ID")
    CDF_CLUSTER = "az-power-no-northeurope"
    COGNITE_PROJECT = "heco-dev"

    # Code to log-in WIHTOUT client_secret
    SCOPES = [f"https://{CDF_CLUSTER}.cognitedata.com/.default"]

    AUTHORITY_HOST_URI = "https://login.microsoftonline.com"
    AUTHORITY_URI = AUTHORITY_HOST_URI + "/" + TENANT_ID
    PORT = 53000


    def authenticate_azure():

        app = PublicClientApplication(client_id=CLIENT_ID, authority=AUTHORITY_URI)

        # interactive login - make sure you have http://localhost:port in Redirect URI in App Registration as type "Mobile and desktop applications"
        creds = app.acquire_token_interactive(scopes=SCOPES, port=PORT)
        return creds


    creds = authenticate_azure()

    client = CogniteClient(
        token_url=creds["id_token_claims"]["iss"],
        token=creds["access_token"],
        token_client_id=creds["id_token_claims"]["aud"],
        project=COGNITE_PROJECT,
        base_url=f"https://{CDF_CLUSTER}.cognitedata.com",
        client_name="cognite-python-dev",
    )
        #Definer ønsket kruver
    curves = ["NO1_consumption_per_15min",
    "NO1_temperature_per_15min",
    "NO1_el_price_per_hour",
    "NO1_cloud_coverage_per_15min"
    ]

    #Definer start dato
    start_dato = datetime(start_year,start_month,start_day)
    slutt_dato = datetime(end_year,end_month,end_day)

    df_watt = pd.DataFrame()
    for curve in curves:
        print(curve)
        hm = client.datapoints.retrieve_dataframe(
            start=start_dato,
            end=slutt_dato,
            aggregates=["average"],
            granularity="1h",
            id=client.time_series.retrieve(external_id=curve).id,)
        df_watt = pd.merge(df_watt, hm, left_index=True, right_index=True, how="outer")

    def add_holidays(df):    
        no_holidays = holidays.NO()
        periods = pd.date_range(start_dato, slutt_dato, freq="H")
        d = np.zeros(len(periods))
        e = np.zeros(len(periods))

        for l in range(len(periods)):
            a = str(int(periods[l].strftime('%Y%m%d')))

            da = int(a[-2:])
            mo = int(a[-4:-2])
            yr = int(a[-8:-4])
            if date(yr,mo,da) in no_holidays:
                d[l] = 1
            if date(yr,mo,da).weekday() > 4:
                e[l] = 1

        df = pd.DataFrame(d,index=[i for i in periods],columns=['holiday?'])
        ef = pd.DataFrame(e,index=[i for i in periods],columns=['weekend?'])

        df = pd.merge(df, df, left_index=True, right_index=True, how='outer')
        df = pd.merge(df, ef, left_index=True, right_index=True, how='outer')
        return df

    add_holidays(df_watt)
    return df_watt


In [None]:
def cdf_to_utc(df):
    t=[]
    for time in df.index:
        t.append(time.tz_localize('UTC'))
    df.index = t
    return df

In [None]:
def feature_eng(df):
    df.columns = ["cons_actual","temp_forecast", "price_forecast", "cc_forecast"]
    df['Seconds'] = df.index.map(pd.Timestamp.timestamp)
    day = 60*60*24
    year = 365.2425*day
    week = day*7

    df['Day sin'] = np.sin(df['Seconds'] * (2* np.pi / day))
    df['Day cos'] = np.cos(df['Seconds'] * (2 * np.pi / day))
    df['Week sin'] = np.sin(df['Seconds'] * (2 * np.pi / week))
    df['Week cos'] = np.cos(df['Seconds'] * (2 * np.pi / week))
    df['Year sin'] = np.sin(df['Seconds'] * (2 * np.pi / year))
    df['Year cos'] = np.cos(df['Seconds'] * (2 * np.pi / year))
    df.drop(['Seconds'], axis=1, inplace=True)
    df.dropna(inplace=True)
    return df

In [None]:
def data_split(X, y):
    """
    splits the data into train, test and val sets
    is set to random to avoid trend bias
    """
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
    X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=1)
    return X_train, X_test, X_val, y_train, y_test, y_val

In [None]:
def get_prognose_data(start, end, validation_data, csv_files):
    # Create a session    
    df_skm = get_skm_data(csv_files)
    df_volue = get_volue_data(csv_files)
    df_merged = pd.merge(df_skm, df_volue, left_index=True, right_index=True, how='outer')

    for index in df_merged.index:
        if index not in validation_data[0].index:
            df_merged.drop(index)

    print(df_merged)
    return df_merged

In [None]:
def main_training():
    """
    For LSTM neural networks we cannot randomly split the data. This is because the time-ordering is essential for the memory-cells. 
    In XGBoost and CatBoost however, the time-ordering is not relevant
    """


    df = get_data_cdf(start_year=2015,start_month=1,start_day=1,end_year=2022,end_month=6,end_day=3)    
    df = cdf_to_utc(df)
    df = feature_eng(df)

    train_cols=list(df.columns)[1:]
    X = df[train_cols]
    y = df['cons_actual']

    X_train, X_test, X_val, y_train, y_test, y_val = data_split(X, y)
    # xgb_model = xgb(X_train, y_train, X_test, y_test)
    # cbr_model = catboostregressor(X_train, y_train)
    # lstm_model, lstm_X_val, lstm_y_val = lstm(df)
    try:
        prognose_data = pd.read_csv('Data\prognose_skm_volue')
    except FileNotFoundError:
        prognose_data = get_prognose_data(start="2020-01-01", end="2022-06-03", validation_data=(X_val, y_val), csv_files=list('Data\SKM Forbruksprognose 2014-2018.csv', 'Data\SKM Forbruksprognose 2018-dd.csv'))
        if input('Save prognose data? ') == 'y':
            prognose_data.to_csv('Data/prognose_skm_volue')

    # xgb_val_pred = xgb_model.predict(xgboost.DMatrix(X_val, label=y_val))
    # cbr_val_pred = cbr_model.predict(X_val)
    # lstm_val_pred = lstm_model.predict(lstm_X_val)

    # evaluate_xgb_cbr(xgb_val_pred, cbr_val_pred, y_val)
    # print(lstm_val_pred)
    # evaluate_lstm(lstm_val_pred, lstm_y_val)