In [118]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import calendar 
from datetime import datetime
import pytz
import openpyxl


In [119]:
import os

up_directory = '/Users/sandermeland/Documents/Jobb/Volte/master-kode/markets/markets-data/aFFR/down_2023'
up_file_list = [file for file in os.listdir(up_directory) if file.endswith('.csv')]

down_directory = '/Users/sandermeland/Documents/Jobb/Volte/master-kode/markets/markets-data/aFFR/up_2023'
down_file_list = [file for file in os.listdir(down_directory) if file.endswith('.csv')]
merged_data = pd.DataFrame()

up_data_files = []
down_data_files = []

for file in up_file_list:
    file_path = os.path.join(up_directory, file)
    data = pd.read_csv(file_path)
    up_data_files.append(data)

for file in down_file_list:
    file_path = os.path.join(down_directory, file)
    data = pd.read_csv(file_path)
    down_data_files.append(data)

In [120]:
up_df = pd.concat(up_data_files, ignore_index= True)
down_df = pd.concat(down_data_files, ignore_index= True)

In [121]:
def afrr_function(up_df : pd.DataFrame, down_df : pd.DataFrame,  area : str, start_month : int, year : int, start_day : int, end_month : int, end_day : int, start_hour : int, end_hour: int):
    
    down_df = down_df.rename(columns = dict(zip(down_df.columns, ["Time(Local)", 
                                                              'aFRR Volume Down NO1','aFRR Price Down NO1', 
                                                              'aFRR Volume Down NO2','aFRR Price Down NO2',
                                                              'aFRR Volume Down NO3','aFRR Price Down NO3',
                                                              'aFRR Volume Down NO4','aFRR Price Down NO4',
                                                              'aFRR Volume Down NO5', 'aFRR Price Down NO5'])))
                
    up_df = up_df.rename(columns = dict(zip(up_df.columns, ["Time(Local)", 
                                                              'aFRR Volume Up NO1','aFRR Price Up NO1', 
                                                              'aFRR Volume Up NO2','aFRR Price Up NO2',
                                                              'aFRR Volume Up NO3','aFRR Price Up NO3',
                                                              'aFRR Volume Up NO4','aFRR Price Up NO4',
                                                              'aFRR Volume Up NO5', 'aFRR Price Up NO5'])))
    
    start_datetime = pd.Timestamp(year = year, month= start_month, day = start_day, hour = start_hour, tz = "Europe/Oslo")
    
        
    end_datetime = pd.Timestamp(year = year, month=end_month, day = end_day, hour = end_hour, tz = "Europe/Oslo")
    
    updated_dfs = []
    for df in [up_df, down_df]:
        df.sort_values(by= "Time(Local)", ignore_index= True, inplace= True)  
        
        df["Time(Local)"] = df["Time(Local)"].str.slice(0,16)
        
        df["Time(Local)"] = pd.to_datetime(df["Time(Local)"], format = '%d.%m.%Y %H:%M')
        
        df["Time(Local)"] = df["Time(Local)"].dt.tz_localize('Europe/Oslo', ambiguous='infer')
        
        filtered_df = df[(df["Time(Local)"] >= start_datetime) & (df["Time(Local)"] < end_datetime)]

        removed_cols = df.columns[~df.columns.str.contains(area)]
        removed_cols = removed_cols[1:]
        # Filter by area
        area_df = filtered_df.drop(columns = removed_cols)
        
        # Sort by "Time(Local)" column
        area_df = area_df.sort_values(by="Time(Local)").reset_index(drop=True)
        updated_dfs.append(area_df)
    

    return updated_dfs

In [122]:
updated_afrr_dfs = afrr_function(up_df, down_df, "NO5",year = 2023, start_month= 6, end_month = 6, start_day = 25, end_day = 27, start_hour= 0, end_hour= 0)

In [123]:
updated_afrr_dfs[0]

Unnamed: 0,Time(Local),aFRR Volume Up NO5,aFRR Price Up NO5
0,2023-06-25 00:00:00+02:00,68,40.0
1,2023-06-25 01:00:00+02:00,0,0.0
2,2023-06-25 02:00:00+02:00,0,0.0
3,2023-06-25 03:00:00+02:00,0,0.0
4,2023-06-25 04:00:00+02:00,0,0.0
5,2023-06-25 05:00:00+02:00,68,40.0
6,2023-06-25 06:00:00+02:00,68,40.0
7,2023-06-25 07:00:00+02:00,68,40.0
8,2023-06-25 08:00:00+02:00,68,25.0
9,2023-06-25 09:00:00+02:00,68,25.0


# Combined the two functions under to one 

In [76]:
def preprocess_aFRR(up_df : pd.DataFrame, down_df : pd.DataFrame):
    down_df = down_df.rename(columns = dict(zip(down_df.columns, ["Time(Local)", 
                                                              'aFRR Volume Down NO1','aFRR Price Down NO1', 
                                                              'aFRR Volume Down NO2','aFRR Price Down NO2',
                                                              'aFRR Volume Down NO3','aFRR Price Down NO3',
                                                              'aFRR Volume Down NO4','aFRR Price Down NO4',
                                                              'aFRR Volume Down NO5', 'aFRR Price Down NO5'])))
                
    up_df = up_df.rename(columns = dict(zip(up_df.columns, ["Time(Local)", 
                                                              'aFRR Volume Up NO1','aFRR Price Up NO1', 
                                                              'aFRR Volume Up NO2','aFRR Price Up NO2',
                                                              'aFRR Volume Up NO3','aFRR Price Up NO3',
                                                              'aFRR Volume Up NO4','aFRR Price Up NO4',
                                                              'aFRR Volume Up NO5', 'aFRR Price Up NO5'])))
    print(up_df.columns)
    print(down_df.columns)
    for df in [up_df, down_df]:
        df.sort_values(by= "Time(Local)", ignore_index= True, inplace= True)  
        
        df["Time(Local)"] = df["Time(Local)"].str.slice(0,16)
        
        df["Time(Local)"] = pd.to_datetime(df["Time(Local)"], format = '%d.%m.%Y %H:%M')
        
        #df.drop_duplicates(subset = ["Time(Local)"], inplace = True)
        
        #ambiguous_timestamp = pd.Timestamp('2022-10-30 02:00:00')

        # Use 'ambiguous' argument to handle ambiguity
        #localized_timestamp = ambiguous_timestamp.tz_localize('Europe/Oslo', ambiguous='infer')
        
        df["Time(Local)"] = df["Time(Local)"].dt.tz_localize('Europe/Oslo', ambiguous='infer')
        
    
    return up_df, down_df

In [77]:
aFRR_up_df, aFRR_down_df = preprocess_aFRR(up_df, down_df)

Index(['Time(Local)', 'aFRR Volume Up NO1', 'aFRR Price Up NO1',
       'aFRR Volume Up NO2', 'aFRR Price Up NO2', 'aFRR Volume Up NO3',
       'aFRR Price Up NO3', 'aFRR Volume Up NO4', 'aFRR Price Up NO4',
       'aFRR Volume Up NO5', 'aFRR Price Up NO5'],
      dtype='object')
Index(['Time(Local)', 'aFRR Volume Down NO1', 'aFRR Price Down NO1',
       'aFRR Volume Down NO2', 'aFRR Price Down NO2', 'aFRR Volume Down NO3',
       'aFRR Price Down NO3', 'aFRR Volume Down NO4', 'aFRR Price Down NO4',
       'aFRR Volume Down NO5', 'aFRR Price Down NO5'],
      dtype='object')


In [79]:
afrr_dict = {'aFRR_up': aFRR_up_df, 'aFRR_down': aFRR_down_df}

In [110]:
def get_date_related_afrr_df(dict : dict, market : str, area : str, start_month : int, year : int, start_day : int, end_month : int, end_day : int):
    """ function to filter the data by area and month

    Args:
        df (pd.DataFrame): dataframe to filter - should be for one given market in one specific year
        area (str): wanted area - chosen from [NO1, NO2, NO3, NO4, NO5]
        month (int): wanted month - chosen from [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
    """
    df = dict[market]
    #date_format = '%Y-%m-%d %H:%M:%S%z'
    
    #timezone = pytz.timezone("Europe/Oslo")    
    start_datetime = pd.Timestamp(year = year, month= start_month, day = start_day, hour = 0, tz = "Europe/Oslo")
    
        
    end_datetime = pd.Timestamp(year = year, month=end_month, day = end_day, hour = 0, tz = "Europe/Oslo")
    
    # Convert "Time(Local)" column to datetime objects - this should have been done before
   # df["Time(Local)"] = pd.to_datetime(df["Time(Local)"], format=date_format)
    
    # Filter based on date range
    filtered_df = df[(df["Time(Local)"] >= start_datetime) & (df["Time(Local)"] < end_datetime)]

    removed_cols = aFRR_up_df.columns[~aFRR_down_df.columns.str.contains(area)]
    removed_cols = removed_cols[1:]
    # Filter by area
    area_df = filtered_df.drop(columns = removed_cols)
    
    # Sort by "Time(Local)" column
    area_df = area_df.sort_values(by="Time(Local)").reset_index(drop=True)
    
    return area_df

In [112]:
get_date_related_afrr_df(afrr_dict, 'aFRR_up', 'NO5', year= 2023, start_month= 6, start_day= 26, end_month= 6, end_day= 27)

Unnamed: 0,Time(Local),aFRR Volume Up NO5,aFRR Price Up NO5
0,2023-06-26 00:00:00+02:00,68,18.0
1,2023-06-26 01:00:00+02:00,0,0.0
2,2023-06-26 02:00:00+02:00,0,0.0
3,2023-06-26 03:00:00+02:00,0,0.0
4,2023-06-26 04:00:00+02:00,0,0.0
5,2023-06-26 05:00:00+02:00,78,18.0
6,2023-06-26 06:00:00+02:00,78,18.0
7,2023-06-26 07:00:00+02:00,78,18.0
8,2023-06-26 08:00:00+02:00,78,18.0
9,2023-06-26 09:00:00+02:00,68,18.0
