### Jupyter notebook version of calculation of historical data

In [2]:
from quatt_aws_utils.s3 import create_s3_client
import boto3
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import pickle
import os
import json
import gzip
import mysql.connector
from mysql.connector import Error
from urllib.parse import urlparse
from dotenv import load_dotenv
import os
from pathlib import Path

# load .env file
env_path = '/' + os.path.join(*os.getcwd().split('/')[:-2], '.env')
load_dotenv(env_path)

True

### Set parameters of script

In [2]:
# INPUTS FOR CALCULATION
AGGREGATIONS =[
    # directly available from cic stats
    {'sql':'cic_id', 'src':'system.quattId', 'func':'last'},
    {'sql':'OLD_hp1_electrical_energy_counter',
     'src':'qc.hp1ElectricalEnergyCounter',
     'func':'last'},
    {'sql':'OLD_hp2_electrical_energy_counter',
     'src':'qc.hp2ElectricalEnergyCounter',
     'func':'last'},
    {'sql':'OLD_hp1_thermal_energy_counter',
     'src':'qc.hp1ThermalEnergyCounter',
     'func':'last'},
    {'sql':'OLD_hp2_thermal_energy_counter',
     'src':'qc.hp2ThermalEnergyCounter',
     'func':'last'},
    {'sql':'OLD_cv_energy_counter',
     'src':'qc.cvEnergyCounter',
     'func':'last'},
    {'sql':'hp1_defrost', 'src':'hp1.defrostFlag', 'func':'mean'},

    # preprocessed from cic stats
    {'sql':'hp1_energy_consumed', 'src':'hp1.energyConsumption', 'func':'sum'},
    {'sql':'hp2_energy_consumed', 'src':'hp2.energyConsumption', 'func':'sum'},
    {'sql':'hp1_data_availability', 'src':'hp1_data_availability', 'func':'max'},
    {'sql':'hp2_data_availability', 'src':'hp2_data_availability', 'func':'max'},
    {'sql':'hp1_heat_generated', 'src':'hp1.heatGenerated', 'func':'sum'},
    {'sql':'hp2_heat_generated', 'src':'hp2.heatGenerated', 'func':'sum'},
    {'sql':'boiler_heat_generated', 'src':'cvHeatGenerated', 'func':'sum'},
    {'sql':'hp1_active', 'src':'hp1.active', 'func':'mean'},
    {'sql':'hp2_active', 'src':'hp2.active', 'func':'mean'},
    {'sql':'boiler_active', 'src':'cvActive', 'func':'mean'}
]

# properties to download from S3
S3_PROPERTIES = {
         'time': ['ts'],
         'system': ['quattId'],
         'qc': ['hp1PowerInput',
                'hp1PowerOutput',
                'hp1ElectricalEnergyCounter',
                'hp2ElectricalEnergyCounter',
                'hp1ThermalEnergyCounter',
                'hp2ThermalEnergyCounter',
                'cvEnergyCounter',
                'cvPowerOutput',
                'supervisoryControlMode'],
         'hp1': ['acInputVoltage',
                 'acInputCurrent',
                 'getFanSpeed',
                 'bottomPlateHeaterEnable',
                 'compressorCrankcaseHeaterEnable',
                 'circulatingPumpDutyCycle',
                 'getCirculatingPumpRelay',
                 'powerInput',
                 'powerOutput',
                 'temperatureOutside',
                 'power',
                 'defrostFlag'],
         'hp2': ['acInputVoltage',
                 'acInputCurrent',
                 'getFanSpeed',
                 'bottomPlateHeaterEnable',
                 'compressorCrankcaseHeaterEnable',
                 'circulatingPumpDutyCycle',
                 'getCirculatingPumpRelay',
                 'powerInput',
                 'powerOuput',
                 'temperatureOutside',
                 'power']}

# Integration keys
INTEGRATION_KEYS = {'hp1.energyConsumption':'hp1.powerConsumption',
                    'hp2.energyConsumption':'hp2.powerConsumption',
                    'hp1.heatGenerated':'hp1.powerOutput',
                    'hp2.heatGenerated':'hp2.powerOutput',
                    'cvHeatGenerated':'cv_power_output'}

# load linear model with pickle
filename = os.path.join(os.path.split(os.getcwd())[0], 'models/energy-power-standard-model.pkl')
with open(filename, 'rb') as f:
    LINEAR_MODEL = pickle.load(f)

### Functions

In [3]:
# function to estimate bphprobability
def bottom_plate_heater_probability(temperature_outside):
    A = 0.31530397864623305
    B = 4.416233732189494
    C = 3.796331812113364

    ans = np.max(
        # if temperature is below 4 degrees always on
        [(temperature_outside<=4).astype(float),
        # between 4 and 7 use probability
        np.all([temperature_outside>4, 
                temperature_outside<=7],
                axis=0)
                .astype(float) \
            *(1 / (A + B * (temperature_outside-C)))
        ], 
        axis=0
    ).tolist()
    return pd.Series(ans)

# replace hp1.powerInput
def prepare_data_for_calculation(df, hp):

    # replace power input
    if hp=='hp1':
        df['hp1.powerInput'] = (
            df['hp1.powerInput'].fillna(df['qc.hp1PowerInput']))

    # set data availability
    df[f'{hp}_data_availability_2'] = (
        df[[f'{hp}.powerInput',
            f'{hp}.getFanSpeed',
            f'{hp}.bottomPlateHeaterEnable',
            f'{hp}.compressorCrankcaseHeaterEnable',
            f'{hp}.circulatingPumpDutyCycle',
            f'{hp}.getCirculatingPumpRelay']]
            .notna()
            .all(axis=1)
            .astype(int) * 2)
    df[f'{hp}_data_availability_1'] = (
        df[[f'{hp}.powerInput',
        f'{hp}.getFanSpeed',
        f'{hp}.temperatureOutside',
        f'{hp}.circulatingPumpDutyCycle',
        f'{hp}.getCirculatingPumpRelay']]
        .notna()
        .all(axis=1)
        .astype(int))

    df[f'{hp}_data_availability'] = (
        np.max(df[[f'{hp}_data_availability_1',
                   f'{hp}_data_availability_2']], axis=1)
    )

    # set bottomplateheaterenable
    df[f'{hp}.bottomPlateHeaterEnable'] = (
        df[f'{hp}.bottomPlateHeaterEnable'].fillna(
            bottom_plate_heater_probability(
                df[f'{hp}.temperatureOutside'].values)
        )
    )

    # set compressorCrankcaseHeaterEnable
    df[f'{hp}.compressorCrankcaseHeaterEnable'] = (
        df[f'{hp}.compressorCrankcaseHeaterEnable'].fillna(
            (df[f'{hp}.temperatureOutside'] > -4).astype(float)
        )
    )
    return df

def estimate_energy_consumption(modelInput, 
                                circulatingPumpDutyCycle, 
                                circulatingPumpRelay, 
                                crankcaseHeater):
    return (LINEAR_MODEL.predict(modelInput)
            + (circulatingPumpDutyCycle * circulatingPumpRelay)
            + (crankcaseHeater * 40))

def integrate_data(df, keys):
    df['timediff[S]'] = df.groupby('system.quattId',
                                   sort='time.ts')['time.ts'].diff()/1000

    for key, value in zip(keys.keys(), keys.values()):
        try:
            df[key] = df[value] * df['timediff[S]'] / 3600
        except KeyError:
            df[key] = np.nan
    
    return df


In [4]:
# aggregate data per hour
def aggregate_data_hourly(df, aggregations):
    '''Aggregate dataframe cic and hour creating a new dataframe.'''

    # check input frame
    if ('time.ts' not in df.columns) | ('system.quattId' not in df.columns):
        raise ValueError('time.ts or system.quattId not in dataframe')
    
    # create time stamps
    df['timestamp_of_data'] = pd.to_datetime(df['time.ts'], unit='ms').dt.ceil('H')
    time_stamps = df.groupby('timestamp_of_data')['timestamp_of_data'].unique().index.values

    df_aggregated = pd.DataFrame(index=time_stamps)
    # add number of rows for each timestamp to df_aggregated
    df_aggregated['number_of_rows'] = (
        df.groupby('timestamp_of_data', sort='time.ts')['time.ts'].count())

    # aggregate data
    for agg in aggregations:
        try:
            if agg['func']=='last':
                df_aggregated[agg['sql']] = (
                    df.groupby('timestamp_of_data',
                               sort='time.ts')[agg['src']]
                               .agg(lambda x: x.iloc[-1])
                )
            elif agg['func']=='sum':
                df_aggregated[agg['sql']] = (
                    df.groupby('timestamp_of_data',
                               sort='time.ts')[agg['src']]
                               .agg(agg['func'], min_count=1)
                )
            else:
                df_aggregated[agg['sql']] = (
                    df.groupby('timestamp_of_data',
                               sort='time.ts')[agg['src']]
                               .agg(agg['func'])
                )
        except KeyError:
            # logger.info(f'Could not aggregate {agg["src"]} with {agg["func"]}')
            df_aggregated[agg['sql']] = np.nan
        except ValueError:
            raise ValueError(f'Could not aggregate {agg["src"]} with {agg["func"]}')
    
    return df_aggregated

In [12]:
def calculate_and_aggregate(df):
    # check for second heat pump
    if (df['hp2.powerInput'].notna().any()):
        heat_pumps = ['hp1', 'hp2'] 
    else:
        heat_pumps = ['hp1']

    for hp in heat_pumps:
        # get data availability and parameters for power calculation
        df = prepare_data_for_calculation(df, hp)

        # calculate powerConsumption for all rows
        df[f'{hp}.powerConsumption'] = (
            estimate_energy_consumption(
            df[[f'{hp}.powerInput',
                    f'{hp}.getFanSpeed',
                    f'{hp}.bottomPlateHeaterEnable']].fillna(0).values,
            df[f'{hp}.circulatingPumpDutyCycle'].values,
            df[f'{hp}.getCirculatingPumpRelay'].values,
            df[f'{hp}.compressorCrankcaseHeaterEnable'].values)
        )

        # set all values of powerconsumption to zero where supervisoryControlMode is NaN
        df.loc[df['qc.supervisoryControlMode'].isna(), 
                    f'{hp}.powerConsumption'] = 0
        
        # get activity of heatpump
        df[f'{hp}.active'] = df['qc.supervisoryControlMode'].isin([2,3]).astype(float)

    # get hp1 power output
    df['hp1.powerOutput'] = (
        df['hp1.powerOutput'].fillna(df['qc.hp1PowerOutput'])
    )

    # get cv power output
    df['cv_power_output'] = df['qc.cvPowerOutput']
    df.loc[df['cv_power_output'] < 0, 'cv_power_output'] = 0
    df['cvActive'] = df['qc.supervisoryControlMode'].isin([3,4]).astype(float)

    # integrate data
    df = integrate_data(df, INTEGRATION_KEYS)

    aggregated_data = aggregate_data_hourly(df, AGGREGATIONS)

    return aggregated_data, df


In [6]:
def make_insert_row_query(index, row):
    query_start = "INSERT INTO cic_data (`time`,"
    query_end = f") VALUES ('{index}',"
    
    # drop nan values
    row = row.dropna()

    for column in row.index:
        query_start += f"{column},"
        query_end += f"'{row[column]}',"
    query = query_start[:-1] + query_end[:-1] + ")"
    return query

# create connection to mysql
def push_data_to_mysql(agg_df: pd.DataFrame):
    mysql_url = os.getenv('MYSQLDEV')
    parsed_mysql_url = urlparse(mysql_url)

    connection = mysql.connector.connect(host=parsed_mysql_url.hostname,
                                            user=parsed_mysql_url.username,
                                            password=parsed_mysql_url.password,
                                            database=parsed_mysql_url.path[1:],
                                            port=parsed_mysql_url.port)

    # create cursor
    cursor = connection.cursor()

    for index, row in agg_df.iterrows():
        print(index)
        query = make_insert_row_query(index, row)
        cursor.execute(query)
        connection.commit()

    # close connection
    cursor.close()
    connection.close()

In [8]:
def extract_data_from_s3(cic_id, start_date, end_date):
    # create s3 clients
    quatt_s3_client_pull = create_s3_client(aws_profile="nout_prod")
    
    # add 20 min to end_date
    extract_df = quatt_s3_client_pull.get_cic_data(cic_ids=cic_id, 
                                                    start_date=start_date,
                                                    end_date=end_date, 
                                                    filter_properties=S3_PROPERTIES,
                                                    cloud_type='production' #production is hardcoded for now
                                                    )
    return extract_df
# load linear model
# # load linear model with pickle
# filename = os.path.join(os.path.split(os.getcwd())[0], 'models/energy-power-standard-model.pkl')
# with open(filename, 'rb') as f:
#     linear_model_standard = pickle.load(f)

# # loop over days
# extract_date = start_date

    

In [3]:
extract_df = pd.read_csv('CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03_2023-01-10_2023-01-25.csv')

# agg_df, extract_df = calculate_and_aggregate(extract_df)

  extract_df = pd.read_csv('CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03_2023-01-10_2023-01-25.csv')


In [13]:
extract_df['timestamp_of_data'] = pd.to_datetime(extract_df['time.ts'], unit='ms')
extract_df['timediff'] = extract_df['time.ts'].sort_values().diff()/1000
extract_df[['timestamp_of_data','qc.supervisoryControlMode','timediff']]

Unnamed: 0,timestamp_of_data,qc.supervisoryControlMode,timediff
0,2023-01-10 14:44:15.924,,
1,2023-01-10 14:44:29.278,,13.354
2,2023-01-10 14:44:43.471,,14.193
3,2023-01-10 14:44:58.419,,14.948
4,2023-01-10 14:45:13.504,,15.085
...,...,...,...
1329131,2023-01-25 00:26:25.987,3.0,1.001
1329132,2023-01-25 00:26:42.428,3.0,16.441
1329133,2023-01-25 00:26:56.532,3.0,14.104
1329134,2023-01-25 00:27:24.989,3.0,14.385


In [17]:
extract_df[~extract_df['qc.supervisoryControlMode'].isna()]['timediff'].value_counts()

1.001     796941
1.000     277569
0.000      92372
1.002      82498
1.003      19765
           ...  
3.425          1
1.301          1
7.422          1
15.013         1
14.072         1
Name: timediff, Length: 906, dtype: int64

In [14]:
agg_df

Unnamed: 0,number_of_rows,cic_id,OLD_hp1_electrical_energy_counter,OLD_hp2_electrical_energy_counter,OLD_hp1_thermal_energy_counter,OLD_hp2_thermal_energy_counter,OLD_cv_energy_counter,hp1_defrost,hp1_energy_consumed,hp2_energy_consumed,hp1_data_availability,hp2_data_availability,hp1_heat_generated,hp2_heat_generated,boiler_heat_generated,hp1_active,hp2_active,boiler_active
2023-01-10 15:00:00,396,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,0.000000,,0.000000,,0.000000,0.000000,4.336902,,1,,0.000000,,0.000000,0.000000,,0.000000
2023-01-10 16:00:00,3195,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,89.220889,,429.037011,,117.883327,0.000000,306.571777,,1,,938.010317,,287.257777,0.326761,,0.034116
2023-01-10 17:00:00,3659,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,418.028806,,2127.783046,,117.883327,0.000000,873.732127,,1,,3689.109088,,0.000000,1.000000,,0.000000
2023-01-10 18:00:00,3896,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,734.259639,,3787.157982,,117.883327,0.000000,841.347416,,1,,3596.981071,,0.000000,1.000000,,0.000000
2023-01-10 19:00:00,3595,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,874.230056,,4576.380831,,117.883327,0.000000,412.882562,,1,,1709.734753,,0.000000,0.567177,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-24 21:00:00,3627,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,111316.115307,,402838.521893,,-125091.180745,0.000000,205.227595,,1,,0.000000,,0.000000,0.000000,,0.000000
2023-01-24 22:00:00,3626,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,111316.115307,,402838.521893,,-125091.180745,0.000000,205.202456,,1,,0.000000,,0.000000,0.000000,,0.000000
2023-01-24 23:00:00,3596,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,111674.045751,,404118.379661,,-125573.195900,0.121802,1107.018792,,1,,2899.923083,,86.206758,0.731368,,0.730812
2023-01-25 00:00:00,3766,CIC-2395976e-d43e-5e28-b6ce-b26674ad9a03,112212.426140,,406049.547957,,-126242.707854,0.159055,1565.926655,,1,,4381.041262,,166.631478,1.000000,,1.000000


In [11]:
extract_df.columns

Index(['Unnamed: 0', 'time.ts', 'system.quattId', 'qc.hp1PowerInput',
       'qc.hp1PowerOutput', 'qc.hp1ElectricalEnergyCounter',
       'qc.hp2ElectricalEnergyCounter', 'qc.hp1ThermalEnergyCounter',
       'qc.hp2ThermalEnergyCounter', 'qc.cvEnergyCounter', 'qc.cvPowerOutput',
       'qc.supervisoryControlMode', 'hp1.acInputVoltage', 'hp1.acInputCurrent',
       'hp1.getFanSpeed', 'hp1.bottomPlateHeaterEnable',
       'hp1.compressorCrankcaseHeaterEnable', 'hp1.circulatingPumpDutyCycle',
       'hp1.getCirculatingPumpRelay', 'hp1.powerInput', 'hp1.powerOutput',
       'hp1.temperatureOutside', 'hp1.power', 'hp1.defrostFlag',
       'hp2.acInputVoltage', 'hp2.acInputCurrent', 'hp2.getFanSpeed',
       'hp2.bottomPlateHeaterEnable', 'hp2.compressorCrankcaseHeaterEnable',
       'hp2.circulatingPumpDutyCycle', 'hp2.getCirculatingPumpRelay',
       'hp2.powerInput', 'hp2.powerOuput', 'hp2.temperatureOutside',
       'hp2.power', 'cic_id', 'hp1_data_availability_2',
       'hp1_data_avail

In [8]:
### testing mysql upload

In [9]:
def create_table(cursor):
    fd = open('create_table_cic_data.sql', 'r')
    create_table_query = fd.read()
    fd.close()
    cursor.execute(create_table_query)


# write query to post df_agg to mysql
query = """
    INSERT INTO `cic_data` (
        `time`,
        `hp1_energy_consumed`,
        `hp1_heat_generated`,
        `hp1_active`,
        `hp2_energy_consumed`,
        `hp2_heat_generated`,
        `hp2_active`,
        `boiler_heat_generated`,
        `boiler_active`,
        `hp1_data_availability`,
        `hp1_defrost`,
        `OLD_hp1_electrical_energy_counter`,
        `OLD_hp1_thermal_energy_counter`,
        `OLD_hp2_electrical_energy_counter`,
        `OLD_hp2_thermal_energy_counter`,
        `OLD_cv_energy_counter`
    )
    VALUES
    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

In [41]:
mysql_url = os.getenv('MYSQLDEV')
parsed_mysql_url = urlparse(mysql_url)

try:
    connection = mysql.connector.connect(host=parsed_mysql_url.hostname,
                                         user=parsed_mysql_url.username,
                                         password=parsed_mysql_url.password,
                                         database=parsed_mysql_url.path[1:],
                                         port=parsed_mysql_url.port)
    if connection.is_connected():
        db_Info = connection.get_server_info()
        print("Connected to MySQL Server version ", db_Info)
        cursor = connection.cursor()
        # cursor.execute("DROP TABLE IF EXISTS cic_data")
        # connection.commit()
        # create_table(cursor)
        # connection.commit()
        cursor.execute("SELECT * FROM cic_data")
        # insert data into table
        # for index, row in aggregate_data.iterrows():
        #     cursor.execute(query, (index, 
        #                             row['hp1_energy_consumed'],
        #                             row['hp1_heat_generated'],
        #                             row['hp1_active'],
        #                             row['hp2_energy_consumed'],
        #                             row['hp2_heat_generated'],
        #                             row['hp2_active'],
        #                             row['boiler_heat_generated'],
        #                             row['boiler_active'],
        #                             row['hp1_data_availability'],
        #                             row['hp1_defrost'],
        #                             row['OLD_hp1_electrical_energy_counter'],
        #                             row['OLD_hp1_thermal_energy_counter'],
        #                             row['OLD_hp2_electrical_energy_counter'],
        #                             row['OLD_hp2_thermal_energy_counter'],
        #                             row['OLD_cv_energy_counter']))
        #     connection.commit()
        #     break
        # put data in table
        record = cursor.fetchall()
        print(record)

except Error as e:
    print("Error while connecting to MySQL", e)
finally:
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection is closed")

Connected to MySQL Server version  8.0.28
[]
MySQL connection is closed


In [42]:
connection = mysql.connector.connect(host=parsed_mysql_url.hostname,
                                        user=parsed_mysql_url.username,
                                        password=parsed_mysql_url.password,
                                        database=parsed_mysql_url.path[1:],
                                        port=parsed_mysql_url.port)

# create cursor
cursor = connection.cursor()

In [49]:
for index, row in aggregate_data.iterrows():
    for column in row.columns:
        
    cursor.execute(query, (index, 
                            row['hp1_energy_consumed'],
                            row['hp1_heat_generated'],
                            row['hp1_active'],
                            row['hp2_energy_consumed'],
                            row['hp2_heat_generated'],
                            row['hp2_active'],
                            row['boiler_heat_generated'],
                            row['boiler_active'],
                            row['hp1_data_availability'],
                            row['hp1_defrost'],
                            row['OLD_hp1_electrical_energy_counter'],
                            row['OLD_hp1_thermal_energy_counter'],
                            row['OLD_hp2_electrical_energy_counter'],
                            row['OLD_hp2_thermal_energy_counter'],
                            row['OLD_cv_energy_counter']))
    connection.commit()
    break

ProgrammingError: 1054 (42S22): Unknown column 'nan' in 'field list'

In [50]:
aggregate_data

Unnamed: 0,number_of_rows,cic_id,OLD_hp1_electrical_energy_counter,OLD_hp2_electrical_energy_counter,OLD_hp1_thermal_energy_counter,OLD_hp2_thermal_energy_counter,OLD_cv_energy_counter,hp1_defrost,hp1_energy_consumed,hp2_energy_consumed,hp1_data_availability,hp2_data_availability,hp1_heat_generated,hp2_heat_generated,boiler_heat_generated,hp1_active,hp2_active,boiler_active
2023-04-01 00:00:00,13,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,35267.135222,,161798.280829,,54868.497657,0.0,0.017186,0.017186,2,2,,,0.0,0.000000,0.000000,0.0
2023-04-01 01:00:00,3589,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,35267.135222,,161798.280829,,54868.497657,0.0,5.149900,5.149900,2,2,,,0.0,0.000000,0.000000,0.0
2023-04-01 02:00:00,3590,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,35267.135222,,161798.280829,,54868.497657,0.0,5.150647,5.150647,2,2,,,0.0,0.000000,0.000000,0.0
2023-04-01 03:00:00,3619,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,35267.135222,,161798.280829,,54868.497657,0.0,5.149320,5.149320,2,2,,,0.0,0.000000,0.000000,0.0
2023-04-01 04:00:00,3590,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,35333.988610,,162258.334396,,54868.497657,0.0,292.186334,13.472858,2,2,,,0.0,0.463788,0.463788,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-04-10 20:00:00,3590,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,70801.842725,,357102.915235,,58438.939901,0.0,5.151046,5.151046,2,2,,,0.0,0.000000,0.000000,0.0
2023-04-10 21:00:00,3589,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,70801.842725,,357102.915235,,58438.939901,0.0,5.149786,5.149786,2,2,,,0.0,0.000000,0.000000,0.0
2023-04-10 22:00:00,3590,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,70801.842725,,357102.915235,,58438.939901,0.0,5.150564,5.150564,2,2,,,0.0,0.000000,0.000000,0.0
2023-04-10 23:00:00,3590,CIC-7eede49c-42c2-5b41-94aa-481dad189abf,70801.842725,,357102.915235,,58438.939901,0.0,5.149909,5.149909,2,2,,,0.0,0.000000,0.000000,0.0


In [11]:
for index, row in aggregate_data.iterrows():
    insert_query = make_insert_row_query(index, row)
    break

In [11]:
# create connection to mysql
mysql_url = os.getenv('MYSQLDEV')
parsed_mysql_url = urlparse(mysql_url)

connection = mysql.connector.connect(host=parsed_mysql_url.hostname,
                                        user=parsed_mysql_url.username,
                                        password=parsed_mysql_url.password,
                                        database=parsed_mysql_url.path[1:],
                                        port=parsed_mysql_url.port)

# create cursor
cursor = connection.cursor()
cursor.execute("DROP TABLE IF EXISTS cic_data")
connection.commit()
create_table(cursor)
connection.commit()

for index, row in agg_df.iterrows():
    print(index)
    query = make_insert_row_query(index, row)
    cursor.execute(query)
    connection.commit()

# close connection
cursor.close()
connection.close()