In [None]:
reset -f


In [None]:
import pandas as pd
import numpy as np
import os
import json


In [None]:
start_date: str = "2022-06-15"
end_date: str = "2022-12-31"


In [None]:
input_folder: str = 'InputData'
meter_values_2022_10_01_file_name: str = 'Mittarilukemat.json'
preprocessed_input_folder: str = 'PreprocessedData'
defa_file_name_gzip: str = 'Defa_Table.gzip'
chargeing_point_name_file_name: str = "Chargeing_point_name.json"
spot_price_file_name_gzip: str = 'Electricity_Price_Table.gzip'
mittalukemat_2023_01_01_file_name: str = "Mittarilukemat_2023_01_01.csv"


In [None]:
file_path: str = os.path.join(input_folder, meter_values_2022_10_01_file_name)
file = open(file_path, "r")
meter_values_2022_10_01: pd.DataFrame = pd.read_json(file, lines=True)
file.close()
file_path: str = os.path.join(input_folder, chargeing_point_name_file_name)
file = open(file_path, "r")
df_chargeing_point_name: pd.DataFrame = pd.read_json(file, lines=True)
file.close()
file_path: str = os.path.join(preprocessed_input_folder, spot_price_file_name_gzip)
df_spot_price: pd.DataFrame = pd.read_parquet(file_path)


In [None]:
meter_values_2022_10_01.head(30)

In [None]:
df_chargeing_point_name.head()


In [None]:
file_path: str = os.path.join(preprocessed_input_folder, defa_file_name_gzip)
df_defa: pd.DataFrame = pd.read_parquet(file_path)


In [None]:
df_defa: pd.DataFrame = pd.merge(df_defa, df_chargeing_point_name, left_on=["id", "connector_id"], 
                                 right_on=["id", "connectorId"])
df_defa.head()


In [None]:
def add_chargeing_time_column(df: pd.DataFrame, row: pd.Series, start_date_and_hour, end_date_and_hour):
    if start_date_and_hour == end_date_and_hour:
        df['chargeing_time'] = (row['end_time'] - row['start_time']).total_seconds()
    else:
        df.loc[df['DateTime'] == start_date_and_hour, 'chargeing_time'] = \
                                            ((start_date_and_hour - row['start_time']).total_seconds() + 3600)
        df.loc[df['DateTime'] == end_date_and_hour, 'chargeing_time'] = \
                                            (row['end_time'] - end_date_and_hour).total_seconds()


In [None]:
def create_list_of_chargeing_times(row: pd.Series, df_spot_price: pd.DataFrame) -> pd.DataFrame:
    start_date_and_hour: str = pd.Timestamp(row['start_time'].strftime('%Y-%m-%d %H:00:00'))
    start_date: str = pd.Timestamp(row['start_time'].strftime('%Y-%m-%d'))
    end_date_and_hour: str = pd.Timestamp(row['end_time'].strftime('%Y-%m-%d %H:00:00'))
    chargeing_time: int = (row['end_time'] - row['start_time']).total_seconds()
    if chargeing_time > 0:
        energy_per_second = row['energy'] / chargeing_time
    else:
        energy_per_second = 0
    chargeing_time_list = pd.date_range(start_date_and_hour, end_date_and_hour, freq='H')
    mask: pd.Series = df_spot_price['DateTime'].isin(chargeing_time_list)
    df_price_report: pd.DataFrame = df_spot_price[mask]
    df_price_report['id'] = row['id']
#     df_price_report['connector_id'] = row['connectorId']
    df_price_report['connector_id'] = row['connectorId']
    df_price_report['Autopaikka'] = row['Autopaikka']
    df_price_report['chargeing_time'] = 3600
    df_price_report['start_time'] = row['start_time']
    df_price_report['end_time'] = row['end_time']
    add_chargeing_time_column(df_price_report, row, start_date_and_hour, end_date_and_hour)
    df_price_report['kulutettu energia'] = df_price_report['chargeing_time'] * energy_per_second
    df_price_report['Lataus aika (min.)'] = np.round(df_price_report['chargeing_time'] / 60, 2)
    df_price_report['Kokonais summa + ALV'] = df_price_report['kulutettu energia'] * df_price_report['Summa+ALV']
    return df_price_report


In [None]:
df_list = df_defa.apply(lambda row: create_list_of_chargeing_times(row, df_spot_price), axis=1)
df_total_cost = df_list[0]
for df in df_list[1:]:
    df_total_cost = pd.concat([df_total_cost, df])
df_total_cost.reset_index(drop=True, inplace=True)


In [None]:
df_cost: pd.DataFrame = df_total_cost[(df_total_cost["DateTime"] >= pd.to_datetime(start_date)) & \
            (df_total_cost["DateTime"] < (pd.to_datetime(end_date) + np.timedelta64(1,"D")))]


In [None]:
df_cost.head()


In [None]:
df_cost_groupby = df_cost.groupby(["Autopaikka"])


In [None]:
print('{:>12} {:>18}'.format \
      ('Autopaikka', 'Kulutus (kWh)'))
i: int = -1
list_of_consumed_electricity: list = list()
for group in df_cost_groupby:
    i += 1
    chargeing_point_name: str = group[0]
    df: pd.DataFrame = group[1]
    the_consumed_electricity: float = df['kulutettu energia'].sum()
    record: dict = {"Autopaikka": chargeing_point_name, \
                    "Kulutus": the_consumed_electricity}
    list_of_consumed_electricity.append(record)
    print('{:>12} {:>18.2f}'.format(
        chargeing_point_name, \
        the_consumed_electricity))

df_consumed_electricity: pd.DataFrame = \
            pd.DataFrame.from_records(list_of_consumed_electricity)



In [None]:
# df_consumed_electricity.head()


In [None]:
meter_values_2023_01_01: pd.DataFrame = \
        pd.merge(meter_values_2022_10_01, \
                 df_consumed_electricity, how="left")
meter_values_2023_01_01.replace(np.nan, 0, inplace=True)
meter_values_2023_01_01["Mittarilukema"] = \
        meter_values_2023_01_01["Mittarilukema"] + \
        meter_values_2023_01_01["Kulutus"]
meter_values_2023_01_01["DateTime"] = pd.to_datetime("2023-01-01")
meter_values_2023_01_01.drop(["Kulutus"], axis=1, inplace=True)



In [None]:
print(meter_values_2023_01_01)


In [None]:
file_path: str = os.path.join(preprocessed_input_folder, mittalukemat_2023_01_01_file_name)
meter_values_2023_01_01.to_csv(file_path)
