In [None]:
reset -f


In [None]:
# https://prod.cloudcharge.se/adminpages/login.html


In [None]:
import os
import re
import pandas as pd


In [None]:
input_folder: str = 'InputData/Defa_Files'
preprocessed_folder: str = 'PreprocessedData'
input_file_list: list = list()
defa_file_name_gzip: str = 'Defa_Table.gzip'
defa_file_name_csv: str = 'Defa_Table.csv'


In [None]:
def create_defa_dataframe() -> pd.DataFrame:
    input_file_list: list = os.listdir(input_folder)

    df_list: list = list()
    for input_file in input_file_list:
        m: re.Match|None = re.match('^.+\.csv$', input_file)
        if m is not None:
            file_path: str = os.path.join(input_folder, input_file)
            df: pd.DataFrame = pd.read_csv(file_path)
            df_list.append(df)
    df_defa = pd.concat(df_list)
    print('Charge events before removing duplicates: {}'.format(df_defa.shape[0]))
    df_defa.drop_duplicates(subset=['id', 'transaction_id', 'connector_id', 'start_time'], inplace=True)
    print('Charge events (duplicates are removed)  : {}'.format(df_defa.shape[0]))
    return df_defa


In [None]:
def create_defa_dataframe_json() -> pd.DataFrame:
    input_file_list: list = os.listdir(input_folder)

    df_list: list = list()
    for input_file in input_file_list:
        m: re.Match|None = re.match('^.+\.json$', input_file)
        if m is not None:
            file_path: str = os.path.join(input_folder, input_file)
            df: pd.DataFrame = pd.read_json(file_path, lines=False)
            s_list: list = df["defaoy-heikunantie9esp"].to_list()
            df: pd.DataFrame = pd.DataFrame(s_list)
            print(df.columns)
            df_list.append(df)
    df_defa = pd.concat(df_list)
    print('Charge events before removing duplicates: {}'.format(df_defa.shape[0]))
    df_defa.drop_duplicates(subset=['id', 'transactionId', 'connectorId', 'startTime'], inplace=True)
    print('Charge events (duplicates are removed)  : {}'.format(df_defa.shape[0]))
    return df_defa


In [None]:
def convert_column_types_json(df: pd.DataFrame):
    df['start_time'] = pd.to_datetime(df['startTime'], dayfirst=True, unit="ms", utc=True).dt.tz_convert('Europe/Helsinki')
    df['end_time'] = pd.to_datetime(df['endTime'], dayfirst=True, unit="ms", utc=True).dt.tz_convert('Europe/Helsinki')
    df.sort_values(['start_time'], inplace=True, ascending=True)


In [None]:
def save_preprocessed_defa_df(df):
    file_path: str = os.path.join(preprocessed_folder, defa_file_name_gzip)
    df.to_parquet(file_path, compression='gzip')
    file_path: str = os.path.join(preprocessed_folder, defa_file_name_csv)
    df.to_csv(file_path)


In [None]:
df_defa: pd.DataFrame = create_defa_dataframe_json()
convert_column_types_json(df_defa)
save_preprocessed_defa_df(df_defa)


In [None]:
def convert_column_types(df: pd.DataFrame):
    df['start_time'] = pd.to_datetime(df['start_time'], dayfirst=True)
    df['end_time'] = pd.to_datetime(df['end_time'], dayfirst=True)
    df.sort_values(['start_time'], inplace=True, ascending=True)


In [None]:
# df_defa: pd.DataFrame = create_defa_dataframe()
# convert_column_types(df_defa)
# save_preprocessed_defa_df(df_defa)


In [None]:
df_defa.head()


In [None]:
df_defa.tail()


In [None]:
df_defa.dtypes


In [None]:
def create_list_of_cahargeing_times(row) -> list:
    start_date_and_hour = row['start_time'].strftime('%Y-%m-%d %H:00:00')
    end_date_and_hour = row['end_time'].strftime('%Y-%m-%d %H:00:00')
    chargeing_time_list = pd.date_range(start_date_and_hour, end_date_and_hour, freq='H')
    return chargeing_time_list


In [None]:
df_defa['chargeing_time_list'] = df_defa.apply(lambda row: create_list_of_cahargeing_times(row), axis=1)


In [None]:
# df_defa['chargeing_time_list']
