## Load Libraries

In [1]:
#! conda install paramiko -y
#! conda install bs4 -y
#! conda install requests -y
#! conda install numpy -y
#! conda install pandas -y
#! conda install selenium -y
#! conda install sqlalchemy -y
#! pip3 install mysql-connector-python

In [2]:
import numpy as np
from datetime import datetime, timedelta
import pandas as pd
#import matplotlib.pyplot as plt
import os
import sys
import time
from path import path as root_path

from sp import SunnyPortal as sp
from ae import AlsoEnergy as ae
from ws import WeatherStation as ws
from de import DominionEnergySFTP as de_sftp
from mysql_lib import mySQLConnect as mysql_lib

  "class": algorithms.Blowfish,


## Path

In [3]:
# path
sp_path = root_path + "sp/sp_data/"
ae_path = root_path + "ae/ae_data/"
de_path = root_path + "de/de_data/"
ws_path = root_path + "ws/ws_data/"
tmy_path = root_path + "tmy/"

driverPath = root_path + "chromedriver"
chromePath = root_path + "chrome/chrome"

sys.path.append(driverPath)

## Date Range 

In [4]:
start_date = datetime(2023,9,20)
end_date =  datetime.today() - timedelta(days=1) #datetime(2024,4,15)

## Other Parameters

In [5]:
interval = 5
thr = 0.4
sp_interval = 5
ae_interval = 1
tmy_interval = 60
sp_itv = int(tmy_interval/sp_interval)
ae_itv = int(tmy_interval/ae_interval)
num_display = 8
num_device = 3

## Acquire Data

In [6]:
def ae_download():
    # Confidential Information
    ae_username = 'tracepv2022@outlook.com'
    ae_password = 'tracePV123!!!!!!'

    ae_object = ae.AlsoEnergy(ae_path, driverPath, chromePath)
    ae_object.setUserName(ae_username)
    ae_object.setPassword(ae_password)
    ae_object.setStartDate(start_date)
    ae_object.setEndDate(end_date)
    ae_object.AlsoEnergy()

In [7]:
def sp_download():
    sp_username = "liweiw@g.clemson.edu"
    sp_password = "tracePV123..."
    sp_object = sp.SunnyPortal(sp_path)
    sp_object.setUserName(sp_username)
    sp_object.setPassword(sp_password)
    sp_object.setStartDate(start_date)
    sp_object.setEndDate(end_date)
    sp_object.SunnyPortal()

In [8]:
def ws_download():
    apiKey = "e1f10a1e78da46f5b10a1e78da96f525"
    ws_object = ws.WeatherStation(ws_path, apiKey)
    ws_object.setStartDate(start_date)
    ws_object.setEndDate(end_date)
    ws_object.WeatherStation()

In [9]:
def de_download():
    sftp_host = 'secureftp.dominionenergy.com'  # Hostname from dominion energy
    sftp_port = 22  # Port 
    sftp_username = 'DESC_CLEMSON'  # Clemson Username
    sftp_private_key_path = "de/sftp/clemson_privatekey.pem" 
    dm_sftp_obj = de_sftp.DominionEnergySFTP(sftp_host, sftp_username, sftp_private_key_path)
    dm_sftp_obj.connect()
    
    # download file
    out_folder_name = "./outbound/"
    download_folder = "de/de_data/"
    out_file_list = dm_sftp_obj.read_file_list(out_folder_name)
    for file in out_file_list:
        if(file not in os.listdir(download_folder)):
            dm_sftp_obj.download(file, download_folder)
        else:
            print(file, "existed!")

In [10]:
# Data Collection
ae_download()
#ws_download()
#sp_download()
#de_download()

/home/liweiw/TRACE_PV/data_process/data_management_platform/


WebDriverException: Message: '' executable may have wrong permissions. Please see https://sites.google.com/a/chromium.org/chromedriver/home


## Load Data

### Load TMY as Reference

#### Load TMY Data

In [None]:
tmy_list = os.listdir(tmy_path)
tmy_dfs = []
for file in tmy_list:
    try:
        if(file[-3:] == "csv"):
            tmy_df = pd.read_csv(tmy_path + file, delimiter=',',skiprows=2)
            tmy_dfs.append(tmy_df)
    except:
        print(file, "data missed")
    
tmy_df = pd.concat(tmy_dfs)

#### Calculate History High and Low

In [None]:
# History High and Low
max_ghi = max(tmy_df['GHI'])
min_ghi = min(tmy_df['GHI'])
max_temp = max(tmy_df['Temperature'])
min_temp = min(tmy_df['Temperature'])
max_rh = max(tmy_df['Relative Humidity'])
min_rh = min(tmy_df['Relative Humidity'])
max_ghi, min_ghi, max_temp, min_temp, max_rh, min_rh

### Load Field Data (Sunny Portal, Also Energy, Weather Station)

#### Caluculate the Index

In [None]:
total_days = (end_date-start_date).days + 1

sp_num_record = np.zeros((total_days),dtype=np.int64)
ae_num_record = np.zeros((total_days),dtype=np.int64)
ws_num_record = np.zeros((total_days),dtype=np.int64)

sp_start_index = np.zeros((total_days),dtype=np.int64)
sp_end_index = np.zeros((total_days),dtype=np.int64)

ae_start_index = np.zeros((total_days),dtype=np.int64)
ae_end_index = np.zeros((total_days),dtype=np.int64)

ws_start_index = np.zeros((total_days),dtype=np.int64)
ws_end_index = np.zeros((total_days),dtype=np.int64)


for i in range(total_days):
    current_date = start_date + timedelta(days=i)
    sp_time_list = pd.date_range(start=current_date.replace(hour=0, minute=0), end=current_date.replace(hour=23, minute=55), freq='5min', tz='US/Eastern')
    ae_time_list = pd.date_range(start=current_date.replace(hour=0, minute=0), end=current_date.replace(hour=23, minute=59), freq='1min', tz='US/Eastern')
    ws_time_list = pd.date_range(start=current_date.replace(hour=0, minute=30), end=current_date.replace(hour=23, minute=59), freq='60min', tz='US/Eastern')
    
    sp_num_record[i] = len(sp_time_list)
    ae_num_record[i] = len(ae_time_list)
    ws_num_record[i] = len(ws_time_list) 

    if(i==0):
        sp_start_index[i] = 0
        ae_start_index[i] = 0
        ws_start_index[i] = 0
        sp_end_index[i] = num_device*sp_num_record[i]
        ae_end_index[i] = ae_num_record[i]
        ws_end_index[i] = ws_num_record[i]
    else:
        sp_start_index[i] = sp_end_index[i-1]
        ae_start_index[i] = ae_end_index[i-1]
        ws_start_index[i] = ws_end_index[i-1]
        sp_end_index[i] = sp_start_index[i] + num_device*sp_num_record[i]
        ae_end_index[i] = ae_start_index[i] + ae_num_record[i]
        ws_end_index[i] = ws_start_index[i] + ws_num_record[i]

#### Load Data File: Also Energy(ae_df), SunnyPortal(sp_env_df, sp_op_df), Weather Station(ws_df)

In [None]:
current_date = start_date
ae_dfs = []
sp_env_dfs = []
sp_op_dfs = []
ws_dfs = []
ws_tmy_dfs = []
for i in range(total_days):
    current_date = start_date + timedelta(days=i)
    ae_filename = ae_path + "ae_" + current_date.strftime("%Y-%m-%d") + ".csv"
    sp_env_filename = sp_path + "environmental/" + "sp_" + current_date.strftime("%Y-%m-%d") + ".csv"
    sp_op_filename = sp_path + "operating/" + "sp_" + current_date.strftime("%Y-%m-%d") + ".csv"
    ws_filename = ws_path + "ws_" + current_date.strftime("%Y-%m-%d") + ".csv"
    
    ae_df = pd.read_csv(ae_filename, delimiter=',')
    ae_dfs.append(ae_df)

    sp_env_df = pd.read_csv(sp_env_filename, delimiter=',')
    sp_env_dfs.append(sp_env_df)

    sp_op_df = pd.read_csv(sp_op_filename, delimiter=',')
    sp_op_dfs.append(sp_op_df)

    ws_df = pd.read_csv(ws_filename, delimiter=',')
    ws_dfs.append(ws_df)
    
    print(current_date, sp_op_df.shape[0], sp_env_df.shape[0],sp_op_df.shape[0]-sp_env_df.shape[0])
    
# Concatentate list
ae_df = pd.concat(ae_dfs, ignore_index=True)
# Delete the index column
ae_df = ae_df.drop(ae_df.columns[0], axis=1)

sp_env_df = pd.concat(sp_env_dfs, ignore_index=True)
sp_op_df = pd.concat(sp_op_dfs, ignore_index=True)
ws_df = pd.concat(ws_dfs, ignore_index=True)

#### Clean Data : Ir is 0 except for the operating time

In [None]:
mask_op = sp_op_df['ac_power'] == -1
mask_env = sp_env_df['ir'] == -1
sp_env_df.loc[mask_env.values, 'ir'] = 0.0
sp_op_df.loc[mask_op.values, 'ac_power'] = 0.0
sp_op_df.loc[mask_op.values, 'dc_power_a'] = 0.0
sp_op_df.loc[mask_op.values, 'dc_power_b'] = 0.0


#### Clean Data : Ir higher than history high limit or history low limit will be filled with the closest value

In [None]:
mask_sp_max_ghi = sp_env_df['ir'] > max_ghi
mask_sp_min_ghi = sp_env_df['ir'] < min_ghi
mask_sp_max_temp = sp_env_df['ambient_temp2'] > max_temp
mask_sp_min_temp = sp_env_df['ambient_temp2'] < min_temp
mask_sp_max_rh = sp_env_df['ambient_rh'] > max_rh
mask_sp_min_rh = sp_env_df['ambient_rh'] < min_rh
mask_ae_max_ghi = ae_df['GHI'] > max_ghi
mask_ae_min_ghi = ae_df['GHI'] < min_ghi

sp_env_df.loc[mask_sp_max_ghi, 'ir'] = np.nan
sp_env_df.loc[mask_sp_min_ghi, 'ir'] = np.nan
sp_env_df.loc[mask_sp_max_temp, 'ambient_temp2'] = np.nan
sp_env_df.loc[mask_sp_min_temp, 'ambient_temp2'] = np.nan
sp_env_df.loc[mask_sp_max_rh, 'ambient_rh'] = np.nan
sp_env_df.loc[mask_sp_min_rh, 'ambient_rh'] = np.nan
ae_df.loc[mask_ae_max_ghi, 'GHI'] = np.nan
ae_df.loc[mask_ae_min_ghi, 'GHI'] = np.nan

sp_env_df['ir'] = sp_env_df['ir'].fillna(method='ffill')
sp_env_df['ambient_temp2'] = sp_env_df['ambient_temp2'].fillna(method='ffill')
sp_env_df['ambient_rh'] = sp_env_df['ambient_rh'].fillna(method='ffill')
ae_df['GHI'] = ae_df['GHI'].fillna(method='ffill')


### Load Dominion Energy (PQDIF) Data 

## Insert to MySQL Database

### Make Connection to MySQL : mysql_obj

In [None]:
def mysql_connect():
    mysql_host = 'hpcese1.computing.clemson.edu'
    mysql_username = 'tracepv'
    mysql_password = 'tracePV123...'
    mysql_database = 'tracepv'

    mysql_connect_object = mysql_lib.mySQLConnect(mysql_username, mysql_password, mysql_database, mysql_host)
    mysql_connect_object.connect()
    
    return mysql_connect_object

In [None]:
mysql_obj = mysql_connect()
#mysql_obj.query("SELECT SunnyPortal.deviceID FROM SunnyPortal WHERE SunnyPortal.time BETWEEN '2022-07-22 23:47:00' AND '2022-07-22 23:48:00' ")
mysql_obj.query("SELECT * FROM SunnyPortal WHERE SunnyPortal.time = '2022-07-22 23:47:00' AND SunnyPortal.deviceID=29")

### For Also Energy Data

#### Adjust Data Format

In [None]:
'''
AE Description:

    AE = {'Time','GHI','POA','ambient_temp','module_temp'}

'''
ae_df

#### Insert the dataframe Day by Day

In [None]:
for i in range(total_days):
    current_date = start_date + timedelta(days=i)
    current_df = ae_df.iloc[ae_start_index[i]:ae_end_index[i]]
    # Insert Operation
    #print(current_date)
    #print(mysql_obj.insert(current_df, "AlsoEnergy"))

### For Weather Station Data

#### Calculate Weather Points

In [None]:
severe_weather = ['Haze', 'Thunder', 'Storm', 'Heavy', 'Drizzle', 'T-Storm'] # 10 points
mild_weather = ['Cloudy', 'Rain', 'Fog', 'Smoke', 'Mist'] # 5 point
weather_condition = dict()
index = 0
for i in range(ws_df.shape[0]):
    if(ws_df['weather_condition'][i] not in weather_condition.keys()):
        weather_condition[ws_df['weather_condition'][i]] = 0
    else:
         weather_condition[ws_df['weather_condition'][i]] =  weather_condition[ws_df['weather_condition'][i]] + 1

            
ws_df['weather_score'] = 0
for i in range(ws_df.shape[0]):
    points = 0
    for k in str(ws_df['weather_condition'][i]).split("/"):
        for kk in k:
            if(kk in severe_weather):
                points = points + 10
            elif(kk in mild_weather):
                points = points + 5
    ws_df.loc[i, 'weather_score']= points

#### Adjust Data Format

In [None]:
'time ', 'ambient_temperature',  'relative_humidity',  'weather_condition ',' weather_score'

ws_df

#### Insert the dataframe Day by Day

In [None]:
for i in range(total_days):
    current_date = start_date + timedelta(days=i)
    current_df = ws_df.iloc[ae_start_index[i]:ae_end_index[i]]
    # Insert Operation
    #print(current_date)
    #print(mysql_obj.insert(current_df, "WeatherStation"))

### For SunnyPortal

#### Adjust Data Format

In [None]:
sp_df_origin = pd.concat([sp_op_df, sp_env_df],axis=1)
sp_df = pd.concat([sp_df_origin.iloc[:,:27], sp_df_origin.iloc[:,32:33], sp_df_origin.iloc[:,30:32], sp_df_origin.iloc[:,33:36], sp_df_origin.iloc[:,27:28]], axis=1)
sp_df.columns = ['time', 'ac_power', 'ac_power_l1', 'ac_power_l2', 'ac_power_l3',
       'ac_reactive_power', 'ac_reactive_power_l1', 'ac_reactive_power_l2',
       'ac_reactive_power_l3', 'ac_apparent_power', 'ac_apparent_power_l1',
       'ac_apparent_power_l2', 'ac_apparent_power_l3', 'ac_voltage_l1',
       'ac_voltage_l2', 'ac_voltage_l3', 'ac_current_l1', 'ac_current_l2',
       'ac_current_l3', 'grid_frequency', 'dc_power_a', 'dc_power_b',
       'dc_voltage_a', 'dc_voltage_b', 'dc_current_a', 'dc_current_b', 'iso', 'ir',
       'ambient_temp', 'ambient_rh', 'cap_temp', 'relay_temp', 'rh', 'deviceID']

#### Insert the Dataframe Day by Day

In [None]:
for i in range(total_days):
    current_date = start_date + timedelta(days=i)
    current_df = sp_df.iloc[sp_start_index[i]:sp_end_index[i]]
    # Insert Operation
    print(current_date)
    print(mysql_obj.insert(current_df, "SunnyPortal"))