# EWX Forecasting Rules

In [368]:
%matplotlib notebook
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import json
import pprint
import os
pp = pprint.PrettyPrinter(1)


## Functions to Use

In [436]:
def sortdir(filepath, num):
    os.chdir(filepath)
    files = np.array(os.listdir())
    time = []
    for file in files:
        try:
            sys_time = round(os.path.getmtime(filepath + "\\" + file))
        except:
            sys_time = round(os.path.getmtime(filepath + "/" + file))
        
        time.append(dt.datetime.fromtimestamp(sys_time))

    time = np.array(time)
    lab = ['files']
    filedf = pd.DataFrame(files, columns = lab)

    filedf['time'] = time
    filedf = filedf.sort_values(by = 'time', axis = 0, ascending = False).reset_index(drop = True)

    print("files found in dir: ", filepath)
    print(filedf.head(num))
    return(filedf.head(num))

def parse_engie(payload):

    with open(payload) as raw:
        idr_engie = json.load(raw)

    trnx = idr_engie['transactioninfo']
    acct = idr_engie['account']

    print('saving data files')
    ts_sca_data = acct['timeseriesdatascalar']
    ts_idr_data = acct['timeseriesdataidr'][0]

    ch = ts_idr_data['channel']
    hb = ts_idr_data['heartbeat']
    print('found', hb, 'heartbeats')
    idr_payload = pd.DataFrame.from_dict(ts_idr_data['reads'])
    idr_payload.v = [float(val) for val in idr_payload.v]
    
    idr_payload.t = pd.to_datetime(idr_payload.t)
    idr_payload = idr_payload.set_index(idr_payload.t)
    idr_payload = idr_payload.drop('t', axis = 1)
    
    
    sca_payload = pd.DataFrame.from_dict(ts_sca_data).iloc[:,1:]
    sca_payload.start = pd.to_datetime(sca_payload.start)
    sca_payload.stop = pd.to_datetime(sca_payload.stop)
    sca_payload.v = [float(val) for val in sca_payload.v]

    print('saving meterid and cap tags')
    meterid = '_'.join([acct['market'], acct['discocode'], acct['accountnumber']])
    
    caps = acct['captag'][0]

    caps_df = pd.DataFrame.from_records(caps, index = [0]).iloc[:,2:]
    caps_df.start = pd.to_datetime(caps_df.start)
    caps_df.stop = pd.to_datetime(caps_df.stop)
    caps_df.v = [float(val) for val in caps_df.v]

    return(idr_payload, int(hb), sca_payload, caps_df, meterid)


def parse_ewx(file):
    
    with open(file) as raw:
        print("loading json...")
        data = json.load(raw) #raw json file
        
    acct = data['account'] #get account data
    ch3 = acct['timeseriesdataidr'] #dictionary of acct attributes
    n = len(ch3)
        
    reads = ch3[0]['reads']
    master_df = pd.DataFrame.from_dict(reads)
    master_df.t = pd.to_datetime(master_df.t)
        
    print('found ', n, 'reads, creating dataset.')
        
    for i in range(1,n):
        reads = ch3[i]['reads']
        temp = pd.DataFrame.from_dict(reads)
        temp.t = pd.to_datetime(temp.t)
        master_df = pd.concat([master_df, temp]).reset_index(drop = True)
    
    print("saving to dataframe...")
    
    master_df = master_df.set_index(master_df.t)
    master_df = master_df.drop('t', axis = 1)
    
    return(master_df)


def periodic_zero(idr, margin, threshold):
    
    tmp = idr.copy()

    tmp['d'] = [time.dayofweek for time in tmp.index]
    tmp['h'] = [time.hour for time in tmp.index]

    #bool if value less than margin
    zeroreadmask = tmp['v'] <= margin #margin = .01
    
    #group zero reads by weekday and hour
    day_hr = list(zip(tmp.index.dayofweek, tmp.index.hour))
    zero_read_group = zeroreadmask.groupby([tmp.index.dayofweek, tmp.index.hour])

    #find proportion of zero reads
    weekly_periodic_reads = pd.DataFrame(zero_read_group.sum().astype(int) / zero_read_group.count())
    
    weekly_periodic_reads.index.names = ['d', 'h']
    weekly_periodic_reads.columns = ['pz']
    
    zeros = pd.merge(tmp, weekly_periodic_reads, how = 'left', right_index = True, left_on = ['d', 'h'])
    
    low_reads = [(zero > 0 and zero < threshold) for zero in zeros.pz]
    zeros['lr'] = low_reads
    
    if len(low_read_index) > 0:
        print('low usage reads found at:', low_read_index)
    
    return(zeros)

def interval_gap_check(tmp2):
    val_diff = tmp2.v.diff().fillna(value = 0)
    time_diff = tmp2.index.to_series().diff()
    time_diff = time_diff.dt.seconds.div(3600, fill_value = 3600)

    tmp2['vd'] = val_diff
    tmp2['td'] = time_diff
    
    #check interval gaps
    gap_after_index = [(float(td) != 1) for td in time_diff]
    tmp2['gap'] = gap_after_index
    
    return(tmp2)


def variance_validation(tmp2, time_window, centered, n_sd):

    tmp2['rm'] = idr['v'].rolling(window = time_window, min_periods = 1, center = centered).mean()
    tmp2['mc'] = tmp2.v - tmp2.rm

    tmp2['crm'] = tmp2['mc'].rolling(window = time_window, min_periods = 10, center = centered).mean()
    tmp2['crsd'] = tmp2['mc'].rolling(window = time_window, min_periods = 10, center = centered).std()

    tmp2['var'] = (tmp2['mc'] - tmp2['crm'])/tmp2['crm']

    tmp2['spike'] = tmp2['mc'] > (tmp2['crm'] + (n_sd + 1) * tmp2['crsd'])
    tmp2['dip'] = tmp2['mc'] < (tmp2['crm'] - n_sd * tmp2['crsd'])
    
    return(tmp2)

In [411]:
def forecast_main(json_file):
    
    #parse json file
    idr, hb, sca, caps, meter = parse_engie(json_file)
    idr = idr.loc[pd.notnull(idr.index),:]
    
    #check for nonperiodic zeros
    tmp2 = periodic_zero(idr, .01, 1)
 
    #get value & time differences
    tmp2 = interval_gap_check(tmp2)
    
    #check spikes & dips
    time_window = int(60*24*3600/hb)
    centered = True
    n_sd = 2

    tmp2 = variance_validation(tmp2, time_window, centered, 2)

## Run Main Forecasting 

### To Do

1. Fill in gaps in missing time values (if gap = true)
2. Interpolate usage values for missing time values (if gap or lr = true)

In [404]:
os.getcwd()
filedf = sortdir(os.getcwd(), 10)
engie_json = filedf.files[3]
print(' ')
print('using file:', engie_json)

files found in dir:  /Users/stevenhurwitt/Downloads/LA-tools/EWX_Forecast
                                           files                time
0                     EWX Forecasting Copy.ipynb 2019-06-01 15:06:13
1  engie_NEPOOL_WMECO_54311202097_965441008.json 2019-06-01 11:14:10
2    ewx_NEPOOL_WMECO_54024602039_024741000.json 2019-06-01 11:14:10
3  engie_NEPOOL_WMECO_54024602039_024741000.json 2019-06-01 11:14:10
4                                        init.py 2019-06-01 11:14:10
5    ewx_NEPOOL_WMECO_54311202097_965441008.json 2019-06-01 11:14:10
6                             .ipynb_checkpoints 2019-05-31 18:26:00
 
using file: engie_NEPOOL_WMECO_54024602039_024741000.json


In [None]:
main_forecast(engie_json)

## Test main_forecast

In [437]:
#parse json file
idr, hb, sca, caps, meter = parse_engie(engie_json)
idr = idr.loc[pd.notnull(idr.index),:]
    
#check for nonperiodic zeros
tmp2 = periodic_zero(idr, .01, 1)

#get value & time differences
tmp2 = interval_gap_check(tmp2)

#check spikes & dips
time_window = int(60*24*3600/hb)
centered = True
n_sd = 2

tmp2 = variance_validation(tmp2, time_window, centered, 2)

tmp2.head()

saving data files
found 3600 heartbeats
saving meterid and cap tags


Unnamed: 0_level_0,v,d,h,pz,lr,vd,td,gap,rm,mc,crm,crsd,var,spike,dip
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-11-01 00:00:00,779.22,3,0,0.0,False,0.0,1.0,False,665.01045,114.20955,-33.396478,135.736495,-4.419808,False,False
2018-11-01 01:00:00,739.26,3,1,0.0,False,-39.96,1.0,False,665.096205,74.163795,-33.315187,135.659762,-3.226126,False,False
2018-11-01 02:00:00,772.416,3,2,0.0,False,33.156,1.0,False,665.181125,107.234875,-33.234601,135.582944,-4.226603,False,False
2018-11-01 03:00:00,767.34,3,3,0.0,False,-5.076,1.0,False,665.262373,102.077627,-33.157595,135.504838,-4.078559,False,False
2018-11-01 04:00:00,765.504,3,4,0.0,False,-1.836,1.0,False,665.358315,100.145685,-33.065779,135.433631,-4.028681,False,False


## Graph

In [369]:
idr.plot(y = 'v')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11ba27748>

In [386]:
tmp2.plot(y = 'vd', color = 'green')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x113a1ffd0>

In [384]:
ewx = parse_ewx(filedf.files[3])
ewx.loc[ewx.index.year <= 2020,:].plot(y = 'v', color = 'orange')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1144060f0>