<span style="color:Blue; font-family:Times New Roman; font-size:1.2em;">Import Required Packages</span>

In [1]:
### Data Processing Packages
import requests # web api 
import platform, os, sys # system, file,and directory 
import json, pprint # json 
import logging, timeit # run time 
import pandas as pd 
import numpy as np

### Datetime Processing Packages
from datetime import datetime # datetime 
import time, dateutil, pytz, calendar # UNIX Time
from tzlocal import get_localzone # timezone 

### Dashboard Packages
import plotly
import plotly.plotly as py
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

plotly.tools.set_credentials_file(username='jasonleetoronto2018', api_key='nj442HUT4BlVXOwawT1l')
init_notebook_mode(connected=True)

### Parallel Processing Packages
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed

<span style="color:Blue; font-family:Times New Roman; font-size:1.2em;">Check Current Working Directory</span>

In [2]:
print ("\033[2;37;40mCurrent file directory is in {} computer and file folder {}\n" .format(platform.uname()[1], os.getcwd()))
os.chdir("C:\\Users\\leejas\\Desktop\\Summer Project")
print (os.getcwd())

[2;37;40mCurrent file directory is in YRK59206 computer and file folder C:\Users\leejas\Desktop\Summer Project

C:\Users\leejas\Desktop\Summer Project


<span style="color:Blue; font-family:Times New Roman; font-size:1.2em;">Read Smart Meter Information</span>

In [69]:
def trimAllColumns(df):
    """
    Trim whitespace from ends of each value across all series in dataframe
    """
    trimStrings = lambda x: x.strip() if type(x) is str else x
    return df.applymap(trimStrings)

def read_meter_info():
    ### Create function for reading in smart meter ip address
    
    df_meter_info = pd.read_excel("meter_ip_list.xlsx")
    df_meter_info.head(5)

    for column in df_meter_info.columns:
        df_meter_info[column]=df_meter_info[column].astype(str)
        
    trimAllColumns(df_meter_info)    
    
    return df_meter_info

df_meter_info = read_meter_info()
df_meter_info.rename(columns={'IP Address':'IP_Address'}, inplace=True) # Remove space in column name
df_meter_info["Real Time"]=df_meter_info['IP_Address'].apply(lambda x: '<a href="http://'+x+'/istat.html">'+x+'</a>')

list_meter_name = list(df_meter_info["Meter"]) 
list_meter_ip = list(df_meter_info["IP_Address"]) 

table = ff.create_table(df_meter_info[["Meter","Real Time"]])
table.layout.width=350
py.iplot(table)


<span style="color:Blue; font-family:Times New Roman; font-size:1.2em;">Define Required Time Module</span>

In [4]:
def z3_to_posix(z3_date):
    
    ''' Changes epoch base z3_date(int)from 2010/01/01(Z3 Epoch time)\
    to 1970/01/01 (posix/UNIX Epoch time) 
    '''
    
    z3_epoch_base = "2010-01-01 00:00:00"    
    posix_epoch_base = "1970-01-01 00:00:00"
    
    time_format = "%Y-%m-%d %H:%M:%S"
    
    # Calculate time delta between z3_epoch_base_date & posix_epoch_base_date
    start_date = datetime.strptime(posix_epoch_base, time_format) # convert string into datetime
    end_date = datetime.strptime(z3_epoch_base, time_format) # convert string into datetime
    
    diff = end_date - start_date
    
    '''
    diff.days 
    diff.seconds//3600 
    diff.seconds % 3600/60
    diff.seconds % 3600 % 3600
    
    '''
    return z3_date + diff.total_seconds()

def posix_to_z3(posix_date):
    
    # Changes epoch base posix_date(int)from to 2010/01/01(Z3 Epoch time)

    
    z3_epoch_base = "2010-01-01 00:00:00"    
    posix_epoch_base = "1970-01-01 00:00:00"
    
    time_format = "%Y-%m-%d %H:%M:%S"
    
    # Calculate time delta between z3_epoch_base_date & posix_epoch_base_date
    start_date = datetime.strptime(posix_epoch_base, time_format) # convert string into datetime
    end_date = datetime.strptime(z3_epoch_base, time_format) # convert string into datetime
    
    diff = end_date - start_date
    
    return posix_date - diff.total_seconds()

def posix_to_local(utc_date):
    # Create a function for converting POSIX time to UTC time to local timezone time
        
    local_tz = get_localzone() # get local timezone
    utc_time = datetime.utcfromtimestamp(utc_date) # UTC Time
    local_time = utc_time.replace(tzinfo=pytz.utc).astimezone(local_tz) # Local Timezone Time
    
    return local_time

def local_to_posix(local_date):
    # Create a function for converting local time zone time to posix time
    
    local_tz = pytz.timezone(str(get_localzone())) # get local time zone & create pytz tz timezone object
    time_format = "%Y-%m-%d %H:%M:%S"
    
    local_date = datetime.strptime(local_date, time_format)
    local_date = local_tz.localize(local_date)
    local_date = local_date.astimezone(dateutil.tz.tzutc()).timetuple()
    
    return calendar.timegm(local_date)

def z3_to_local(z3_date):
    return posix_to_local(z3_to_posix(z3_date))

def local_to_z3(local_date):
    return posix_to_z3(local_to_posix(local_date))

def settlement_day_of_month(mydate, end=1):
    ### Create a function for calculating the last day of the month
    
    time_format = "%Y-%m-%d"
    mydate=datetime.strptime(mydate, time_format)
    
    if end==0:
        mydate=datetime(mydate.year,mydate.month,1)
    
    else:
        mydate=datetime(mydate.year,mydate.month,1)+dateutil.relativedelta.relativedelta(months=1,days=-1)
    
    return mydate

<span style="color:Blue; font-family:Times New Roman; font-size:1.2em;">Create ETL Module</span>

In [72]:
def fetch_datalog(meter_name, url):
    # Import API processing packages
    import requests # http package
    import json # json package
    
    # Fetch datalog smart meter data 
    try:
        url = requests.get("http://"+url+"/datalog.json?hdr=1")
        dict_datalog = json.loads(url.text)
        power_mult = dict_datalog["scale"][4]
        energy_mult = dict_datalog["scale"][6]
        column_names = dict_datalog["names"]
        list_power = dict_datalog["logdata"]
        df_power = pd.DataFrame(list_power,columns=column_names)
        
        df_power["Local Time"] = df_power["time"].apply(lambda x: datetime.strftime(z3_to_local(x),"%Y-%m-%d %H:%M:%S"))
        df_power["P(A+B+C)"] = df_power["P(A+B+C)"]*power_mult
        df_power["kWh(A+B+C)"] = df_power["WHr(A+B+C)"]*(energy_mult/1000)
        
        df_power.rename(columns={"time":"Z3 Datetime"}, inplace=True)
        df_datalog[meter_name]=df_power.sort_values(["Local Time"], ascending=[False])
        print "\033[2;34;40mSuccessfully fetched {} datalog from {} to {}\n" .format(meter_name,df_power["Local Time"].iloc[0], df_power["Local Time"].iloc[-1])
    
    except Exception as e:
        print "\033[2;31;43m{}\n".format(e) 
    

In [79]:
def fetch_1min(meter_name, url):
    
    # Import API processing packages
    import requests # http package
    import json # json package
    
    # Fetch 1min smart meter data from start_date to end_date
    try:
        url = requests.get("http://"+url+"/sdata.json?m=f1t")
        dict_1min = json.loads(url.text)
        power_mult = dict_1min["pmul"]
        list_power = dict_1min["power"]
        df_power = pd.DataFrame(list_power)        
        
        list_dt = []
        list_W = []
        list_kWh = [] 
    
        # Flatten nested list with z3_datetime and power_watt columns
        for _, row in df_power.iterrows():
            list_dt += range(row[0],row[0] + 60*(len(row[1])),60)
            list_W += row[1]

        list_W = pd.Series(list_W)*power_mult     
        list_kWh = list_W.cumsum()/(1000*60)

        data = {"date_time":list_dt,"W": list_W, "kWh":list_kWh}
        df_meter = pd.DataFrame(data)

        # Create meter_loaded and local_time columns
        df_meter["local_time"] = df_meter["date_time"].apply(lambda x: datetime.strftime(z3_to_local(x),"%Y-%m-%d %H:%M:%S"))

        # Filter out dates before beginning date of the currne datalog
        df_meter = df_meter.loc[df_meter["date_time"] >= start_date].sort_values(["local_time"], ascending=[False])
        df_meter.rename(columns={"date_time":"Z3 Datetime", "local_time":"Local Time", "W": "P(A+B+C)", "kWh":"kWh(A+B+C)"}, inplace = True)
        df_1min[meter_name] = df_meter
        
        print "\033[2;36;40mSuccessfully fetched {} 1MIN data from {} to {}\n".format(meter_name, df_meter["Local Time"].iloc[0], df_meter["Local Time"].iloc[-1])

    except Exception as e:
        print "\033[2;31;43m{}\n".format(e) 


In [12]:
def fetch_1hour(meter_name, url):
    
    # Import API processing packages
    import requests # http package
    import json # json package
    
    # Create date subsetting api paramter
    t_str = "&t="+str(start_date) if len(str(start_date))!=0 else ""

    date_str = t_str

    # Fetch 1min smart meter data from start_date to end_date
    try:
        url = requests.get("http://"+url+"/sdata.json?m=f1h"+date_str)
        dict_1hour = json.loads(url.text)

        energy_mult = dict_1hour["emul"]
        
        list_power = dict_1hour["energy"]
        df_power = pd.DataFrame(list_power, columns=["date_time","Whr(A+B+C)"])
       
        df_power["Local Time"] = df_power["date_time"].apply(lambda x: datetime.strftime(z3_to_local(x),"%Y-%m-%d %H:%M:%S"))
        df_power["kWh(A+B+C)"] = df_power["Whr(A+B+C)"]*(energy_mult/1000)
        df_power["Loaded Meter"] = meter_name
        
        
        df_power.rename(columns={"date_time":"Z3 Datetime"}, inplace=True)
        
        df_power = df_power[["Z3 Datetime", "Local Time","kWh(A+B+C)"]]
        
        df_1hour[meter_name] = df_power.loc[df_power["Z3 Datetime"] >= start_date].sort_values(["Local Time"], ascending=[False])

        print "\033[2;36;40mSuccessfully stored {} 1HOUR data from {} to {}\n".format(meter_name, df_power["Local Time"].iloc[0], df_power["Local Time"].iloc[-1])    
        
    except Exception as e:
        print "\033[2;31;43m{}\n".format(e) 


<span style="color:Blue; font-family:Times New Roman; font-size:1.2em;">Data Extraction and Transform using Parallel Processing</span>

In [11]:
### Datalog Data Extraction
df_datalog = {} # create temporary 1min dataframe dictionary

### Execute Parallel Processing Data Extraction using Cuncurrent.futures
with ThreadPoolExecutor(max_workers=3) as executor:
    args = ((meter_name, url)for meter_name, url in zip(list_meter_name, list_meter_ip))
    executor.map(lambda p: fetch_datalog(*p), args)
    
### Designate Starting and Ending Date for Data Extraction in %Y-%m-%d %H:%M:%S 
  
list_start=[]
list_end=[]

for key, value in df_datalog.items():
    list_start.append(value["Z3 Datetime"].iloc[-1])
    list_end.append(value["Z3 Datetime"].iloc[0])
    
start_date = pd.Series(list_start).min()
end_date = pd.Series(list_start).max()

print "\033[2;34;40mSmart Meter Comparison is for the period of from {} to {}\n" .format(z3_to_local(start_date), z3_to_local(end_date))


[2;34;40mSuccessfully fetched TX1 Panel A datalog from 2018-05-04 05:02:00 to 2018-07-16 07:39:00

[2;34;40mSuccessfully fetched TX3 Panel B datalog from 2018-05-03 11:33:00 to 2018-07-16 07:39:00

[2;34;40mSuccessfully fetched Incoming Electrical Service datalog from 2018-05-16 23:38:00 to 2018-07-16 07:39:00

[2;34;40mSuccessfully fetched Exterior Lighting datalog from 2018-05-03 06:09:00 to 2018-07-16 07:40:00

[2;34;40mSuccessfully fetched Vehicle Charger 1 datalog from 2018-05-03 10:30:00 to 2018-07-16 07:40:00

[2;34;40mSuccessfully fetched Vehicle Charger 2 datalog from 2018-05-03 04:07:00 to 2018-07-16 07:40:00

[2;34;40mSuccessfully fetched Panel C1 Lighting datalog from 2018-04-30 19:27:00 to 2018-07-16 07:41:00

[2;34;40mSuccessfully fetched Panel C3 DHW datalog from 2018-05-03 05:13:00 to 2018-07-16 07:41:00

[2;34;40mSuccessfully fetched Panel C4 Pumps datalog from 2018-07-05 09:11:00 to 2018-07-16 07:41:00

[2;34;40mSuccessfully fetched Panel C2 Lighting datalog

In [81]:
### 1MIN Data Extraction
df_1min = {} # create temporary 1min dataframe dictionary

### Execute Parallel Processing Data Extraction using 
with ThreadPoolExecutor(max_workers=3) as executor:
    args = ((meter_name, url)for meter_name, url in zip(list_meter_name, list_meter_ip))
    executor.map(lambda p: fetch_1min(*p), args)
    
### Designate Starting and Ending Date for Data Extraction in %Y-%m-%d %H:%M:%S 
  
list_start=[]
list_end=[]

for key, value in df_1min.items():
    list_start.append(value["Z3 Datetime"].iloc[-1])
    list_end.append(value["Z3 Datetime"].iloc[0])
    
beg_date = pd.Series(list_start).min()
end_date = pd.Series(list_start).max()

print "\033[2;34;40mSmart Meter Comparison is for the period of from {} to {}\n" .format(z3_to_local(beg_date), z3_to_local(end_date))


[2;36;40mSuccessfully fetched Panel C3 DHW 1MIN data from 2018-07-16 09:43:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched Panel C1 Lighting 1MIN data from 2018-07-16 09:43:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched Panel C2 Lighting 1MIN data from 2018-07-16 09:43:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched TX1 Panel A 1MIN data from 2018-07-16 09:45:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched TX3 Panel B 1MIN data from 2018-07-16 09:45:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched Exterior Lighting 1MIN data from 2018-07-16 09:45:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched Vehicle Charger 1 1MIN data from 2018-07-16 09:45:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched Incoming Electrical Service 1MIN data from 2018-07-16 09:45:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched Vehicle Charger 2 1MIN data from 2018-07-16 09:45:00 to 2018-04-17 09:21:00

[2;36;40mSuccessfully fetched TX1 

KeyboardInterrupt: 

In [40]:
### 1HOUR Data Extraction
df_1hour = {} # create temporary 1hour dataframe dictionary

### Execute Parallel Processing Data Extraction using 
with ThreadPoolExecutor(max_workers=3) as executor:
    args = ((meter_name, url)for meter_name, url in zip(list_meter_name, list_meter_ip))
    executor.map(lambda p: fetch_1hour(*p), args)
    
### Designate Starting and Ending Date for Data Extraction in %Y-%m-%d %H:%M:%S 
  
list_start=[]
list_end=[]

for key, value in df_1hour.items():
    list_start.append(value["Z3 Datetime"].iloc[-1])
    list_end.append(value["Z3 Datetime"].iloc[0])
    
beg_date = pd.Series(list_start).min()
end_date = pd.Series(list_start).max()

print "\033[2;34;40mSmart Meter Comparison is for the period of from {} to {}\n" .format(z3_to_local(beg_date), z3_to_local(end_date))


[2;36;40mSuccessfully stored Incoming Electrical Service 1HOUR data from 2018-07-16 08:44:43 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored TX1 Panel A 1HOUR data from 2018-07-16 08:44:43 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored TX3 Panel B 1HOUR data from 2018-07-16 08:44:42 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored Vehicle Charger 1 1HOUR data from 2018-07-16 08:44:43 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored Vehicle Charger 2 1HOUR data from 2018-07-16 08:44:43 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored Exterior Lighting 1HOUR data from 2018-07-16 08:44:43 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored Panel C3 DHW 1HOUR data from 2018-07-16 08:44:44 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored Panel C1 Lighting 1HOUR data from 2018-07-16 08:44:43 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored Panel C2 Lighting 1HOUR data from 2018-07-16 08:44:44 to 2018-04-17 10:00:00

[2;36;40mSuccessfully stored Panel

<span style="color:Blue; font-family:Times New Roman; font-size:1.2em;">Datalog vs 1MIN</span>

In [84]:
### Create Available Data Records, Dat

df_temp = {}

for meter_name in list_meter_name:
    
    df_dl = df_datalog[meter_name]
    df_dm = df_1min[meter_name]
    
    beg_date1 = max(df_dl["Z3 Datetime"].iloc[-1], df_dm["Z3 Datetime"].iloc[-1]) 
    end_date1 = min(df_dl["Z3 Datetime"].iloc[0], df_dm["Z3 Datetime"].iloc[0])
    
    obs_dl = len(df_dl[(df_dl["Z3 Datetime"]>=beg_date1) & (df_dl["Z3 Datetime"]<=end_date1)])
    obs_mt = len(df_dm[(df_dm["Z3 Datetime"]>=beg_date1) & (df_dm["Z3 Datetime"]<=end_date1)])
    
    df_temp[meter_name]=[meter_name, datetime.strftime(z3_to_local(beg_date1),"%Y-%m-%d %H:%M:%S"), datetime.strftime(z3_to_local(end_date1),"%Y-%m-%d %H:%M:%S"),(end_date1 - beg_date1)/60 + 1, obs_dl, obs_mt]
 
    df_datalog_1hour = pd.DataFrame.from_dict(df_temp, orient='index').rename(columns={0:"Loaded Meter" ,1:"Begin Date", 2:"End Date",3:"Available Hours", 4:"Datalog Hours", 5:"1HOUR Hours"})    

table = ff.create_table(df_datalog_1hour)
table.layout.width=800
py.iplot(table)

KeyError: 'ERV'

<span style="color:Blue; font-family:Times New Roman; font-size:1.2em;">Datalog vs 1HOUR</span>

In [85]:
df_temp = {}

for meter_name in list_meter_name:
    ### Filter only hourly data from datalog

    df_dl = df_datalog[meter_name]
    Filter=pd.to_datetime(df_dl["Local Time"], format="%Y-%m-%d %H:%M:%S").dt.minute==0 ### series quantities access through .dt accessor
    df_dl = df_dl[Filter]
    
    ### hourly
    df_hr = df_1hour[meter_name]
    beg_date1 = max(df_dl["Z3 Datetime"].iloc[-1], df_hr["Z3 Datetime"].iloc[-1]) 
    end_date1 = min(df_dl["Z3 Datetime"].iloc[0], df_hr["Z3 Datetime"].iloc[0])
    
    obs_dl = len(df_dl[(df_dl["Z3 Datetime"]>=beg_date1) & (df_dl["Z3 Datetime"]<=end_date1)])
    obs_hr = len(df_hr[(df_hr["Z3 Datetime"]>=beg_date1) & (df_hr["Z3 Datetime"]<=end_date1)])
    
    df_temp[meter_name]=[meter_name, datetime.strftime(z3_to_local(beg_date1),"%Y-%m-%d %H:%M:%S"), datetime.strftime(z3_to_local(end_date1),"%Y-%m-%d %H:%M:%S"),(end_date1 - beg_date1)/(60*60)+1, obs_dl, obs_hr]

    
df_datalog_1hour = pd.DataFrame.from_dict(df_temp, orient='index').rename(columns={0:"Loaded Meter" ,1:"Begin Date", 2:"End Date",3:"Available Hours", 4:"Datalog Hours", 5:"1HOUR Hours"})    

table = ff.create_table(df_datalog_1hour)
table.layout.width=800
py.iplot(table)    


[2;36;40mSuccessfully fetched ERV 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:39:00

[2;36;40mSuccessfully fetched FC1 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:41:00

[2;36;40mSuccessfully fetched Panel C1 Lighting 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:40:00

[2;36;40mSuccessfully fetched Panel C3 DHW 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:40:00

[2;36;40mSuccessfully fetched Panel C2 Lighting 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:40:00

[2;36;40mSuccessfully fetched Cistern System CT 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:42:00

[2;36;40mSuccessfully fetched Panel C4 Pumps 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:47:00

[2;36;40mSuccessfully fetched Ceiling Fans 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:44:00

[2;36;40mSuccessfully fetched Panel C4 Pumps 1MIN data from 2018-04-17 09:21:00 to 2018-07-16 10:49:00

[2;36;40mSuccessfully fetched Exhaust Fans 1MIN data from 2018-04-17 09