In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import calendar 
from datetime import datetime
import pytz
import openpyxl


# RKOM

In [43]:
rkom_2022_df = pd.read_excel("/Users/sandermeland/Documents/Jobb/Volte/master-kode/markets/markets-data/RKOM.xlsx")
rkom_2023_df = pd.read_excel("/Users/sandermeland/Documents/Jobb/Volte/master-kode/markets/markets-data/Rkom-2023.xlsx")
rkom_dfs = [rkom_2022_df, rkom_2023_df]


In [45]:
def preprocess_rkom_df(df_list):
    # remove all rows where hour is between 2-5 and between 7-24
    updated_dfs = []
    for df in df_list:
        rkom_df = df[~df['Hour'].isin([2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24])]
        #change hour 1 to 1-5 and change hour 6 to 6-24
        for hour in rkom_df["Hour"]:
            if hour == 1:
                rkom_df["Hour"] = rkom_df["Hour"].replace(1, "1-5")
            elif hour == 6:
                rkom_df["Hour"] = rkom_df["Hour"].replace(6, "6-24")    
        updated_dfs.append(rkom_df)
    return updated_dfs[0], updated_dfs[1]

In [46]:
rkom_22, rkom_23 = preprocess_rkom_df(rkom_dfs)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rkom_df["Hour"] = rkom_df["Hour"].replace(1, "1-5")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rkom_df["Hour"] = rkom_df["Hour"].replace(6, "6-24")


In [81]:
rkom_23.loc[(rkom_23["Week"] == 2)]

Unnamed: 0,Year,Week,Areas,Hour,Direction,RKOM-H Volume Weekday,RKOM-H Price Weekday,RKOM-H Volume Weekend,RKOM-H Price Weekend,RKOM-B Volume Weekday,RKOM-B Price Weekday,RKOM-B Volume Weekend,RKOM-B Price Weekend
624,2023,2,"NO3,NO4",1-5,Up,0.0,0.0,,,0.0,0.0,,
629,2023,2,"NO3,NO4",6-24,Up,30.0,18.5,,,396.0,18.5,,
648,2023,2,"NO3,NO4",1-5,Down,0.0,0.0,,,0.0,0.0,,
653,2023,2,"NO3,NO4",6-24,Down,0.0,0.0,,,0.0,0.0,,
3024,2023,2,"NO1,NO2,NO5",1-5,Up,0.0,0.0,,,0.0,0.0,,
3029,2023,2,"NO1,NO2,NO5",6-24,Up,1122.0,85.0,,,24.0,8.0,,
3048,2023,2,"NO1,NO2,NO5",1-5,Down,0.0,0.0,,,0.0,0.0,,
3053,2023,2,"NO1,NO2,NO5",6-24,Down,0.0,0.0,,,0.0,0.0,,
4944,2023,2,"NO1,NO2,NO3,NO4,NO5",1-5,Up,,,0.0,0.0,,,0.0,0.0
4949,2023,2,"NO1,NO2,NO3,NO4,NO5",6-24,Up,,,0.0,0.0,,,0.0,0.0


In [103]:
# function to remove all rows where an area is not present
def get_hour_val_area_df(df, area : str, month, day, hour):
    """Function to filter out all rows where an area is not present

    Args:
        area (str): specify which area to use from [NO1, NO2, NO3, NO4, NO5]
        rkom_df (pd.DataFrame): Full dataframe

    Returns:
        pd.DataFrame: dataframe with only the specified area
    """
    
    year = df["Year"][0]
    #remove all rows where the are is equal to nan
    area_df = df.drop(df["Areas"][df["Areas"].isna()].index)

    #remove all rows where the chosen area is not present
    area_df = area_df.drop(area_df["Areas"].loc[(area_df["Areas"].str.contains(area) == False)].index)        
    #area_df = df.drop(df["Areas"][df["Areas"].str.contains(area) == False].index)        

    #Sort by week and then sort by hour within each week    
    area_df = area_df.sort_values(by=["Week", "Hour"] )
    time_of_day = '1-5' if hour <= 5 else '6-24'
    date = datetime(year, month, day)
    week_num = date.isocalendar()[1]
    
    if len(area_df.loc[(area_df["Week"] == week_num)]) > 4:
        area_df = area_df.drop(area_df["Areas"][area_df["Areas"].str.contains("NO1,NO2,NO3,NO4,NO5")].index)
        area_df = area_df.fillna(0)
    else:
        area_df = area_df.fillna(0)
    
    area_df = area_df.loc[area_df["Week"] == week_num].reset_index(drop=True)
    return area_df.loc[(area_df["Hour"] == time_of_day)]


In [100]:
def create_standardized_RKOM_df(df_list, area, year, start_month, start_day, start_hour, end_month, end_day, end_hour):
    if year == 2022:
        df = df_list[0]
    else:
        df = df_list[1]
    
    year = df["Year"].iloc[0]
    #print(year)
    #print(type(year))
    next_year = year + 1
    date_horizon =  pd.date_range(start=pd.Timestamp(year= year, month= start_month, day = start_day, hour = start_hour), 
                            end= pd.Timestamp(year = year, month = end_month, day = end_day, hour = end_hour), freq="H", tz = "Europe/Oslo")
    std_df = pd.DataFrame(np.zeros((len(date_horizon), 9)), columns= ["Time(Local)", "RKOM-H Price up", "RKOM-H Volume up", "RKOM-B Price up", "RKOM-B Volume up", "RKOM-H Price down", "RKOM-H Volume down", "RKOM-B Price down", "RKOM-B Volume down"])
    std_df["Time(Local)"] = date_horizon
    print(date_horizon)
    for date in std_df["Time(Local)"]:
        month = date.month
        day = date.day
        hour = date.hour
        
        hour_val = get_hour_val_area_df(df, area, month, day, hour) 
        #print(hour_val)
        if date.weekday() < 5:
            std_df["RKOM-H Price up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Price Weekday"].iloc[0] * 0.085
            std_df["RKOM-H Volume up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Volume Weekday"].iloc[0]
            std_df["RKOM-B Price up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Price Weekday"].iloc[0] * 0.085
            std_df["RKOM-B Volume up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Volume Weekday"].iloc[0]
            std_df["RKOM-H Price down"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Price Weekday"].iloc[1] * 0.085
            std_df["RKOM-H Volume down"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Volume Weekday"].iloc[1]
            std_df["RKOM-B Price down"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Price Weekday"].iloc[1] * 0.085
            std_df["RKOM-B Volume down"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Volume Weekday"].iloc[1]
        else:
            std_df["RKOM-H Price up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Price Weekend"].iloc[0] * 0.085
            std_df["RKOM-H Volume up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Volume Weekend"].iloc[0]
            std_df["RKOM-B Price up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Price Weekend"].iloc[0] * 0.085
            std_df["RKOM-B Volume up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Volume Weekend"].iloc[0]
            std_df["RKOM-H Price down"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Price Weekend"].iloc[1] * 0.085
            std_df["RKOM-H Volume down"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Volume Weekend"].iloc[1]
            std_df["RKOM-B Price down"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Price Weekend"].iloc[1] * 0.085
            std_df["RKOM-B Volume down"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Volume Weekend"].iloc[1]
   
    # add an extra row for the timestamp equal to     
    #std_df["Time(Local)"] = std_df["Time(Local)"].dt.tz_localize('Europe/Oslo')
    return std_df

In [104]:
test_df = create_standardized_RKOM_df([rkom_22, rkom_23], "NO5", year = 2023, start_month= 6, start_day= 25, start_hour=0, end_month=6, end_day= 27, end_hour= 0)

DatetimeIndex(['2023-06-25 00:00:00+02:00', '2023-06-25 01:00:00+02:00',
               '2023-06-25 02:00:00+02:00', '2023-06-25 03:00:00+02:00',
               '2023-06-25 04:00:00+02:00', '2023-06-25 05:00:00+02:00',
               '2023-06-25 06:00:00+02:00', '2023-06-25 07:00:00+02:00',
               '2023-06-25 08:00:00+02:00', '2023-06-25 09:00:00+02:00',
               '2023-06-25 10:00:00+02:00', '2023-06-25 11:00:00+02:00',
               '2023-06-25 12:00:00+02:00', '2023-06-25 13:00:00+02:00',
               '2023-06-25 14:00:00+02:00', '2023-06-25 15:00:00+02:00',
               '2023-06-25 16:00:00+02:00', '2023-06-25 17:00:00+02:00',
               '2023-06-25 18:00:00+02:00', '2023-06-25 19:00:00+02:00',
               '2023-06-25 20:00:00+02:00', '2023-06-25 21:00:00+02:00',
               '2023-06-25 22:00:00+02:00', '2023-06-25 23:00:00+02:00',
               '2023-06-26 00:00:00+02:00', '2023-06-26 01:00:00+02:00',
               '2023-06-26 02:00:00+02:00', '2023-0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  std_df["RKOM-H Price up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Price Weekend"].iloc[0] * 0.085
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  std_df["RKOM-H Volume up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-H Volume Weekend"].iloc[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  std_df["RKOM-B Price up"][(std_df["Time(Local)"] == date)] = hour_val["RKOM-B Price Weekend"].iloc[0] * 0.085
A value is trying to be set on a copy of a slice fro

In [102]:
test_df

Unnamed: 0,Time(Local),RKOM-H Price up,RKOM-H Volume up,RKOM-B Price up,RKOM-B Volume up,RKOM-H Price down,RKOM-H Volume down,RKOM-B Price down,RKOM-B Volume down
0,2023-06-25 00:00:00+02:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2023-06-25 01:00:00+02:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2023-06-25 02:00:00+02:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2023-06-25 03:00:00+02:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2023-06-25 04:00:00+02:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2023-06-25 05:00:00+02:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2023-06-25 06:00:00+02:00,0.0,0.0,0.0,0.0,21.25,618.0,0.0,0.0
7,2023-06-25 07:00:00+02:00,0.0,0.0,0.0,0.0,21.25,618.0,0.0,0.0
8,2023-06-25 08:00:00+02:00,0.0,0.0,0.0,0.0,21.25,618.0,0.0,0.0
9,2023-06-25 09:00:00+02:00,0.0,0.0,0.0,0.0,21.25,618.0,0.0,0.0
