## Data collection for analysis of electricity data

In [None]:
import pandas as pd
import numpy as np
from quatt_aws_utils.s3 import create_s3_client
from datetime import datetime
from sklearn.model_selection import train_test_split

In [None]:
def preProcessSplitDataFrame(data_df: pd.DataFrame, y_offset=0):

    # Remove Nans
    data_df.dropna(axis=0)

    # Correct circulating pump power values
    data_df['hp1.circulatingPumpDutyCycle'] = data_df['hp1.circulatingPumpDutyCycle']*data_df['hp1.getCirculatingPumpRelay']
    # data_df['calculatedPower'] = data_df['hp1.acInputVoltage']*data_df['hp1.acInputCurrent']
    # data_df['calculatedPowerSquaredCurrent'] = data_df['hp1.acInputVoltage']*(data_df['hp1.acInputCurrent']**2)
    # data_df['calculatedPowerSquareRootCurrent'] = data_df['hp1.acInputVoltage']*(data_df['hp1.acInputCurrent']**0.5)

    # # Add non-linear power terms
    # data_df['powerSquared'] = data_df['qc.hp1PowerInput']**0.5

    # Split the data into input features (X) and target variable (y)
    # feature_cols = ['qc.hp1PowerInput', 'hp1.getFanSpeed', 'hp1.bottomPlateHeaterEnable', 'hp1.compressorCrankcaseHeaterEnable']
    feature_cols = ['qc.hp1PowerInput', 'hp1.getFanSpeed', 'hp1.temperatureOutside', 'hp1.bottomPlateHeaterEnable', 'hp1.compressorCrankcaseHeaterEnable']
    # feature_cols = ['calculatedPower', 'calculatedPowerSquaredCurrent', 'calculatedPowerSquareRootCurrent', 'hp1.getFanSpeed', 'hp1.bottomPlateHeaterEnable', 'hp1.compressorCrankcaseHeaterEnable']
    # feature_cols = ['hp1PowerInput', 'getFanSpeed']

    # Split data into train and test
    train, test = train_test_split(data_df, test_size=0.5, shuffle=False)

    X_train = train[feature_cols]
    X_test = test[feature_cols]

    # y = data['PowerIn']
    try:
        y_train = train['system.externalElectricityMeterPower'] + y_offset - train['hp1.circulatingPumpDutyCycle']
        y_test = test['system.externalElectricityMeterPower'] + y_offset - test['hp1.circulatingPumpDutyCycle']
        energy_train = train[['time.ts', 'system.quattId', 'system.externalElectricityMeterEnergy', 'system.externalElectricityMeterPower', 'hp1.circulatingPumpDutyCycle', 'qc.supervisoryControlMode']]
        energy_test = test[['time.ts', 'system.quattId', 'system.externalElectricityMeterEnergy', 'system.externalElectricityMeterPower', 'hp1.circulatingPumpDutyCycle', 'qc.supervisoryControlMode']]
    except KeyError:
        y_train = train['qc.externalElectricityMeterPower'] + y_offset - train['hp1.circulatingPumpDutyCycle']
        y_test = test['qc.externalElectricityMeterPower'] + y_offset - test['hp1.circulatingPumpDutyCycle']
        energy_train = train[['time.ts', 'system.quattId', 'qc.externalElectricityMeterEnergy', 'qc.externalElectricityMeterPower', 'hp1.circulatingPumpDutyCycle', 'qc.supervisoryControlMode']]
        energy_test = test[['time.ts', 'system.quattId', 'qc.externalElectricityMeterEnergy', 'qc.externalElectricityMeterPower', 'hp1.circulatingPumpDutyCycle', 'qc.supervisoryControlMode']]

    # if y.isna().any():
    #     idx = y[~y.isna()].index
    #     y = y[y.index.isin(idx)]
    #     X = X[X.index.isin(idx)]

    return X_train, y_train, X_test, y_test, energy_train, energy_test

In [None]:
quatt_s3_client = create_s3_client(aws_profile="nout_prod")

# Load training data from s3 into a pandas DataFrame
cics = [
        'CIC-9368bfef-7eca-5bda-9a90-8d5a4be375c6', 
        'CIC-0f293b7a-4524-5fc7-84b9-66f80a5a6d7c',
        'CIC-7eede49c-42c2-5b41-94aa-481dad189abf', # 8kW
        'CIC-e265a6ef-8365-5bab-a661-c23935c3c6ea' # 8kW
        ]
# Compensating for W of energy consumption by the external electricity meter 
offsets = [
           -11.74,
           -10.07,
           0,
           0
           ]
props = [
         {'time': ['ts'],
          'system': ['externalElectricityMeterPower','quattId','externalElectricityMeterEnergy'],
         'qc': ['hp1PowerInput','supervisoryControlMode'],
         'hp1': ['acInputVoltage', 'acInputCurrent', 'getFanSpeed', 'bottomPlateHeaterEnable', 'compressorCrankcaseHeaterEnable', 'circulatingPumpDutyCycle', 'getCirculatingPumpRelay',
                 'temperatureOutside', 'power']},
         {'time': ['ts'],
          'system': ['externalElectricityMeterPower','quattId','externalElectricityMeterEnergy'],
         'qc': ['hp1PowerInput', 'supervisoryControlMode'],
         'hp1': ['acInputVoltage', 'acInputCurrent', 'getFanSpeed', 'bottomPlateHeaterEnable', 'compressorCrankcaseHeaterEnable', 'circulatingPumpDutyCycle', 'getCirculatingPumpRelay',
                 'temperatureOutside', 'power']},
         {'time': ['ts'],
          'system': ['quattId'],
          'qc': ['hp1PowerInput', 'externalElectricityMeterPower', 'externalElectricityMeter2Power', 'externalElectricityMeterEnergy', 'externalElectricityMeter2Energy', 'supervisoryControlMode'],
         'hp1': ['acInputVoltage', 'acInputCurrent', 'getFanSpeed', 'bottomPlateHeaterEnable', 'compressorCrankcaseHeaterEnable', 'circulatingPumpDutyCycle', 'getCirculatingPumpRelay',
                 'temperatureOutside', 'evaporatorCoilTemperature', 'power'],
         'hp2': ['acInputVoltage', 'acInputCurrent', 'getFanSpeed', 'bottomPlateHeaterEnable', 'compressorCrankcaseHeaterEnable', 'circulatingPumpDutyCycle', 'getCirculatingPumpRelay','powerInput',
                 'temperatureOutside', 'power']},
         {'time': ['ts'],
          'system': ['quattId'],
          'qc': ['hp1PowerInput', 'externalElectricityMeterPower', 'externalElectricityMeter2Power', 'externalElectricityMeterEnergy', 'externalElectricityMeter2Energy', 'supervisoryControlMode'],
         'hp1': ['acInputVoltage', 'acInputCurrent', 'getFanSpeed', 'bottomPlateHeaterEnable', 'compressorCrankcaseHeaterEnable', 'circulatingPumpDutyCycle', 'getCirculatingPumpRelay',
                 'temperatureOutside', 'evaporatorCoilTemperature', 'power'],
         'hp2': ['acInputVoltage', 'acInputCurrent', 'getFanSpeed', 'bottomPlateHeaterEnable', 'compressorCrankcaseHeaterEnable', 'circulatingPumpDutyCycle', 'getCirculatingPumpRelay', 'powerInput',
                 'temperatureOutside', 'power']}
]
start_date = datetime(2023, 4, 1)
end_date = datetime(2023, 4, 1)

# try to empty old variables
try:
        del X_train
        del X_test
        del y_train
        del y_test
        del energy_test
        del energy_train
except:
        pass

for cic, offset, prop in zip(cics, offsets, props):
        extract_df = quatt_s3_client.get_cic_data(cic_ids=cic, 
                                                  start_date=start_date, end_date=end_date, 
                                                  filter_objects=['time'], 
                                                  filter_properties=prop,
                                                  cloud_type='production'
                                                 )
        # for 8kw heat pumps rename columns
        if 'hp2' in prop:
                hp1_columns = ['qc.hp1PowerInput', 'qc.externalElectricityMeterPower', 'hp1.acInputVoltage', 'qc.externalElectricityMeterEnergy',
                                                   'hp1.acInputCurrent','hp1.getFanSpeed','hp1.bottomPlateHeaterEnable','hp1.compressorCrankcaseHeaterEnable',
                                                   'hp1.circulatingPumpDutyCycle','hp1.getCirculatingPumpRelay','hp1.temperatureOutside','hp1.power',
                                                   'system.quattId','time.ts', 'qc.supervisoryControlMode']
                hp2_columns = ['hp2.powerInput', 'qc.externalElectricityMeter2Power', 'hp2.acInputVoltage', 'qc.externalElectricityMeter2Energy',
                                                   'hp2.acInputCurrent','hp2.getFanSpeed','hp2.bottomPlateHeaterEnable','hp2.compressorCrankcaseHeaterEnable',
                                                   'hp2.circulatingPumpDutyCycle','hp2.getCirculatingPumpRelay','hp2.temperatureOutside','hp2.power',
                                                   'system.quattId','time.ts','qc.supervisoryControlMode']
                extract_df_hp1 = extract_df[hp1_columns].copy()
                extract_df_hp2 = extract_df[hp2_columns].copy()
                extract_df_hp2.columns = hp1_columns
                extract_df_hp1['system.quattId'] = extract_df_hp1['system.quattId'].apply(lambda x: x + '_hp1')
                extract_df_hp2['system.quattId'] = extract_df_hp2['system.quattId'].apply(lambda x: x + '_hp2')
                
                X_extract_train1, y_extract_train1, X_extract_test1, y_extract_test1, energy_extract_train1, energy_extract_test1 = preProcessSplitDataFrame(extract_df_hp1, offset)
                X_extract_train2, y_extract_train2, X_extract_test2, y_extract_test2, energy_extract_train2, energy_extract_test2 = preProcessSplitDataFrame(extract_df_hp2, offset)

                X_extract_train = pd.concat([X_extract_train1, X_extract_train2], axis=0, ignore_index=True)
                y_extract_train = pd.concat([y_extract_train1, y_extract_train2], axis=0, ignore_index=True)
                X_extract_test = pd.concat([X_extract_test1, X_extract_test2], axis=0, ignore_index=True)
                y_extract_test = pd.concat([y_extract_test1, y_extract_test2], axis=0, ignore_index=True)
                energy_extract_train = pd.concat([energy_extract_train1, energy_extract_train2], axis=0, ignore_index=True)
                energy_extract_test = pd.concat([energy_extract_test1, energy_extract_test2], axis=0, ignore_index=True)
                
        else:
                X_extract_train, y_extract_train, X_extract_test, y_extract_test, energy_extract_train, energy_extract_test = preProcessSplitDataFrame(extract_df, offset)
        
        # if 'X_train' in locals():
        #         X_train = pd.concat([X_train, X_extract_train], ignore_index=True)
        #         y_train = pd.concat([y_train, y_extract_train], ignore_index=True)
        #         X_test = pd.concat([X_test, X_extract_test], ignore_index=True)
        #         y_test = pd.concat([y_test, y_extract_test], ignore_index=True)
        #         energy_train = pd.concat([energy_train, energy_extract_train], ignore_index=True)
        #         energy_test = pd.concat([energy_test, energy_extract_test], ignore_index=True)
        # else:
        #         X_train = X_extract_train
        #         y_train = y_extract_train
        #         X_test = X_extract_test
        #         y_test = y_extract_test
        #         energy_train = energy_extract_train
        #         energy_test = energy_extract_test

        # save files to csv for later use in case we break downloading of data in multiple pieces
        X_extract_train.to_csv(f'data/{cic}-X_train-april.csv', index=False)
        y_extract_train.to_csv(f'data/{cic}-y_train-april.csv', index=False)
        X_extract_test.to_csv(f'data/{cic}-X_test-april.csv', index=False)
        y_extract_test.to_csv(f'data/{cic}-y_test-april.csv', index=False)
        energy_extract_train.to_csv(f'data/{cic}-energy_train-april.csv', index=False)
        energy_extract_test.to_csv(f'data/{cic}-energy_test-april.csv', index=False)


## Data collection for analysis of heat generation by heatpump and CV boiler

In [None]:
# Getting data from S3 to verify heat power measured
quatt_s3_client = create_s3_client(aws_profile="nout_prod")  # profile for production S3

# Load training data from s3 into a pandas DataFrame
cics = [
        'CIC-9368bfef-7eca-5bda-9a90-8d5a4be375c6', 
        'CIC-0f293b7a-4524-5fc7-84b9-66f80a5a6d7c'
        ]

# Compensating for W of energy consumption by the external electricity meter 
props = [
         {'time': ['ts'],
          'system': ['quattId','externalEnergyMeter1Energy', 'externalEnergyMeter1Power', 'externalEnergyMeter1Flow', 'externalEnergyMeter1Supply', 'externalEnergyMeter1Return', 'externalEnergyMeter1Cooling',
                     'externalEnergyMeter2Energy', 'externalEnergyMeter2Power','externalEnergyMeter2Flow', 'externalEnergyMeter2Supply', 'externalEnergyMeter2Return', 'externalEnergyMeter2Cooling'],
         'qc': ['hp1PowerInput', 'hp1PowerOutput', 'externalElectricityMeterPower', 'cvPowerOutput', 'supervisoryControlMode', 'hp1ThermalEnergyCounter', 'cvEnergyCounter'],
         'hp1': ['temperatureWaterIn', 'temperatureWaterOut', 'inletTemperatureFiltered', 'outletTemperatureFiltered', ],
         'flowMeter': ['flowRate', 'waterSupplyTemperature']},
         {'time': ['ts'],
          'system': ['quattId','externalEnergyMeter1Energy', 'externalEnergyMeter1Power', 'externalEnergyMeter1Flow', 'externalEnergyMeter1Supply', 'externalEnergyMeter1Return', 'externalEnergyMeter1Cooling',
                     'externalEnergyMeter2Energy', 'externalEnergyMeter2Power','externalEnergyMeter2Flow', 'externalEnergyMeter2Supply', 'externalEnergyMeter2Return', 'externalEnergyMeter2Cooling'],
         'qc': ['hp1PowerInput', 'hp1PowerOutput', 'externalElectricityMeterPower', 'cvPowerOutput', 'supervisoryControlMode', 'hp1ThermalEnergyCounter', 'cvEnergyCounter'],
         'hp1': ['temperatureWaterIn', 'temperatureWaterOut', 'inletTemperatureFiltered', 'outletTemperatureFiltered'],
         'flowMeter': ['flowRate', 'waterSupplyTemperature']}
]
start_date = datetime(2023, 4, 1)
end_date = datetime(2023, 4, 20)

# try to empty old variables
try:
        del heat_df
except:
        pass

for cic, prop in zip(cics, props):
        extract_df = quatt_s3_client.get_cic_data(cic_ids=cic, 
                                                  start_date=start_date, end_date=end_date,
                                                  filter_properties=prop,
                                                  cloud_type='production'
                                                 )

        if 'heat_df' in locals():
                heat_df = pd.concat([heat_df, extract_df], ignore_index=True)
        else:
                heat_df = extract_df

if True:
        # save frame to csv for later use
        heat_df.to_csv('data/heat_df.csv', index=False)