In [1]:
%matplotlib inline

# generic
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# custom
from icap.database.icapdatabase import ICapDatabase
from icap.results.results import Results

In [2]:
fp = 'icap/database/icapdatabase.json'
conn = ICapDatabase(fp).connect()

if conn.__class__.__name__ != 'Connection':
    raise Exception('No Connection')

## Temp Variant Testing

In [66]:
temp_station_query = """select
                RTrim(StationCode) as StationCode,
                ObservedDate, --Hour,
                Temperature, WetBulbTemperature
            from CONED_NYWeatherData
            order by
                ObservedDate"""

ts = pd.read_sql(temp_station_query, conn)

In [75]:
"""
Hourly Average:
    WetBulbTemperature = WBT; Temperature = T;
    
    for hour in ObservedDate:
        hourly_avg[i] = 0.25 * (KNYC_WBT + KNYC_T + KLGA_WBT + KLGA_T)

"""
hourly_avg = pd.pivot_table(ts, index='ObservedDate', 
               columns='StationCode', 
               values=['Temperature', 'WetBulbTemperature'],
                    ).mean(1)

In [80]:
"""
Rolling Average:
    1. Group hourly average into days
    2. Rolling mean over 3 hour window
    3. Take maximum average per day
"""
daily_max_avg = hourly_avg.groupby(pd.TimeGrouper('D')
                  ).rolling(window=3).mean().max(level=0)

In [None]:
"""
Rolling Weighted Sum
    weights = [0.1, 0.2, 0.7]
    day[i-2], day[i-1], day[i] = weights 

"""
# helper function to compute weighted sum
def f(w):
    def g(x):
        return (w * x).sum()
    return g

# Weights
wts = np.array([.1, .2, .7])

In [40]:
# CONVERSIONS
# convert hour into timedelta
# increment `ObservedDate` by correspoding Timedelta
# drop the `Hour` columns
ts['Hour'] = ts['Hour'].apply(lambda x: pd.Timedelta(hours=x))
ts['ObservedDate'] = ts['ObservedDate'] + ts['Hour']
td = ts.drop('Hour', axis=1)

In [55]:
# update index
#td.set_index('ObservedDate', inplace=True)
ts.set_index('ObservedDate', inplace=True)
ts.drop('Hour', axis=1, inplace=True)

In [57]:
ts.head()

Unnamed: 0_level_0,StationCode,Temperature,WetBulbTemperature
ObservedDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-04-30 20:00:00,KNYC,48,46
2014-04-30 20:00:00,KLGA,46,45
2014-04-30 21:00:00,KLGA,48,47
2014-04-30 21:00:00,KNYC,48,47
2014-04-30 22:00:00,KNYC,48,47


In [58]:
# AGGREGATION
# Station_i : sum = temperature + wetbulbtemperature

#td['RowSum'] = td.sum(axis=1)
ts['RowSum'] = ts.sum(axis=1)

In [43]:
knyc = td[td['StationCode'] == 'KNYC']
klga = td[td['StationCode'] == 'KLGA']

In [60]:
knyc.head()

Unnamed: 0_level_0,StationCode,Temperature,WetBulbTemperature,RowSum
ObservedDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-05-01 16:00:00,KNYC,48,46,94
2014-05-01 18:00:00,KNYC,48,47,95
2014-05-01 20:00:00,KNYC,48,47,95
2014-05-01 22:00:00,KNYC,50,48,98
2014-05-01 00:00:00,KNYC,50,48,98


In [61]:
klga.head()

Unnamed: 0_level_0,StationCode,Temperature,WetBulbTemperature,RowSum
ObservedDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-05-01 16:00:00,KLGA,46,45,91
2014-05-01 18:00:00,KLGA,48,47,95
2014-05-01 20:00:00,KLGA,46,45,91
2014-05-01 22:00:00,KLGA,48,46,94
2014-05-01 00:00:00,KLGA,48,47,95


In [59]:
ts.drop(['Temperature', 'WetBulbTemperature'], axis=1
       ).pivot(columns='StationCode', values='RowSum')

ValueError: Index contains duplicate entries, cannot reshape

In [10]:
# drop columns aggregated in `RowSum`
# pivot stations into columns
# avg: (Station_i:RowSum) + (Station_j:RowSum) * (1/4)
hr_avg = td.drop(['Temperature', 'WetBulbTemperature'], axis=1
                 ).pivot(columns='StationCode', values='RowSum'
                         ).apply(lambda row: row.sum() * .25, axis=1)

ValueError: Index contains duplicate entries, cannot reshape

--

In [3]:
from icap.coned.coned import CONEDRecipe
c = CONEDRecipe(conn=conn, results=Results).run_all()

In [6]:
c.write_comparison_to_csv()
c.analyze_comparison(write_to_excel=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ICap,CapacityTagValue
MeterType,Year,RateClass,Strata,Valid,Unnamed: 5_level_1,Unnamed: 6_level_1
CON,2015,2,2,0,1,1
CON,2016,2,2,0,1,1
CON,2016,2,2,1,1,1
CON,2017,2,2,,4,0
CON,2017,8,8,,2,0
CON,2017,9,9,,6,0
DMD,2015,1,1,0,2449,2449
DMD,2015,1,1,1,96,96
DMD,2015,1,1,,1,11
DMD,2015,116,116,0,1,1


## CONED Interval

In [4]:
%%time
from icap.coned.coned import CONEDInterval
c = CONEDInterval(conn)

CPU times: user 1.68 s, sys: 44 ms, total: 1.72 s
Wall time: 18.8 s


In [7]:
c.hourly.head().columns

Index(['RateClass', 'Service Classification', 'ZoneCode', 'Stratum', 'TOD',
       'StartDate', 'EndDate', 'BilledUsage', 'BilledDemand', 'CPHourUsage',
       'VarTest', 'MCD', 'NormUsage', 'MeterLogic', 'MeterRegex', 'Usage'],
      dtype='object')

In [6]:
from icap.results.results import Results
r = Results(conn,c.compute_icap())


In [7]:
r.analyze_comparison()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ICap,CapacityTagValue
MeterType,Year,RateClass,Strata,Valid,Unnamed: 5_level_1,Unnamed: 6_level_1
INT,2015,12,12,0,2,2
INT,2015,39,39,0,3,3
INT,2015,51,51,0,1,1
INT,2015,8,8,0,2,2
INT,2015,9,9,0,27,27
INT,2016,12,12,1,2,2
INT,2016,39,39,0,2,2
INT,2016,8,8,1,1,1
INT,2016,9,9,0,1,1
INT,2016,9,9,1,11,11


In [6]:
hourly_cp = c.get_hourly_cp()

In [10]:
??c.meter_logic

In [7]:
hourly_rec = c.get_hourly()

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [16]:
hourly_query = """
    select
        h.PremiseId,
        p.RateClass, ce.[Service Classification],
        ce.[Zone Code] as ZoneCode,
        ce.[Stratum Variable] as Stratum,
        ce.[Time of Day Code] as TOD,
        Year(m.EndDate) as Year,
        DateAdd(day, 0,  m.StartDate) as StartDate,
        m.EndDate,
        m.Usage as BilledUsage,
        m.Demand as BilledDemand,
        Round(Sum(h.Usage), 0) as CPHourUsage,
        'INT' as MeterType,
        iif(Abs((m.Usage-Sum(h.Usage))/m.Usage)<=0.04, 1, 0) as VarTest
    from [HourlyUsage]  h
    inner join [MonthlyUsage]  m
        on m.PremiseId = h.PremiseID
        and m.UtilityID = h.UtilityId
    inner join CoincidentPeak as cp
        on cp.UtilityId = h.UtilityId
        and Year(cp.CPDate) = Year(m.EndDate)
        and (cp.CPDate between m.StartDate and m.EndDate)
        and (h.UsageDate between m.StartDate and m.EndDate)
    inner join Premise as p
        on p.PremiseId = h.PremiseId
    inner join ConED as ce
        on CAST(ce.[Account Number] as varchar) = h.PremiseId
    where
        h.UtilityId = 'CONED'
        and h.HourEnding between 1 and 24
    group by
        h.PremiseId, MeterType,
        p.RateClass, ce.[Service Classification],
        ce.[Zone Code], ce.[Stratum Variable], ce.[Time of Day Code],
        Year(m.EndDate),
        m.StartDate, m.EndDate,
        m.Usage,
        m.Demand
    having
        Count(h.Usage) = (DateDiff(hour, m.StartDate, m.EndDate) + 24)
        """
# obtain data; set defaults; converions
df = pd.read_sql(hourly_query,conn)

In [17]:
df.head()

Unnamed: 0,PremiseId,RateClass,Service Classification,ZoneCode,Stratum,TOD,Year,StartDate,EndDate,BilledUsage,BilledDemand,CPHourUsage,MeterType,VarTest


In [13]:
df['MCD'] = np.NaN
df['NormUsage'] = np.NaN
df['TOD'] = df['TOD'].apply(lambda x: np.int(x))

In [14]:
# create multi-index; sort
df.set_index(['PremiseId', 'Year'], inplace=True)
df.sort_index(inplace=True)

In [15]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,RateClass,Service Classification,ZoneCode,Stratum,TOD,StartDate,EndDate,BilledUsage,BilledDemand,CPHourUsage,MeterType,VarTest,MCD,NormUsage
PremiseId,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1


In [None]:
# determine meter type
df['MeterLogic'] = df.apply(self.meter_logic, axis=1)
df['MeterRegex'] = df['MeterLogic'].apply(tod_regex)

In [None]:
r = Results(conn, c.compute_icap())

In [None]:
r.analyze_comparison()

### Interval Varinace <= 4%

In [4]:
# select MCD; Usage[CP_date, CP_hour]
tmp = pd.merge(c.varTrue.reset_index(), c.hourly_cp, 
        how='left',
        on=['PremiseId', 'Year'])

In [5]:
tmp2 = pd.merge(tmp, c.util,
        how='left',
        left_on=['Year', 'ZoneCode'],
        right_on=['Year', 'Zone'])

In [6]:
match_mask = tmp2['MeterLogic'] == tmp2['MeterType_y']
all_mask = tmp2['MeterType_y'] == 'ALL'
mask = (match_mask == 1) | (all_mask == 1)

tmp2['Factor'] = tmp2['Factor'].apply(lambda x: x + 1.0)


tmp2['MCD'] = tmp2['Usage']
tmp2.ix[mask].groupby(['PremiseId', 'Year', 'RateClass', 'MeterLogic']).apply(coned_icap)

NameError: name 'coned_icap' is not defined

In [23]:
tmp2['Factor'] = tmp2['Factor'].apply(lambda x: x + 1.0)

In [7]:
c.__dict__.keys()

dict_keys(['hourly', 'util', 'temp_var', 'varTrue', 'cp', 'conn', 'tod_map', 'varFalse', 'params', 'hourly_cp', 'rc_map', 'lst'])

In [18]:
def coned_icap(g):
    mcd = g['Usage'].values
    stf = g[g['ParameterId'] == 'SubzoneTrueupFactor']['Factor'].values
    ftf = g[g['ParameterId'] == 'ForecastTrueupFactor']['Factor'].values
    try:
        icap = mcd[0] * stf[0] * ftf[0]
    except IndexError:
        icap = np.nan
    return icap

In [26]:
tmp2.groupby(['PremiseId', 'Year', 'RateClass']).apply(coned_icap)

PremiseId        Year  RateClass
295118610000006  2014  9            1940.745655
                 2015  9            1926.716482
299011610500024  2014  9              98.770568
393011440000000  2014  9             550.117929
                 2015  9             558.978161
393021200700004  2014  51            656.248290
393031443500004  2014  9             217.686875
393161340500003  2014  9             801.577554
393181404000005  2014  9             517.585506
                 2015  9             628.092598
393181404500004  2014  9            2206.498213
                 2015  9            2111.021646
463129207600005  2014  9             617.049388
                 2015  9             672.956355
494011340000009  2014  9             761.312015
494013609000001  2014  9             583.716988
494013704000005  2014  9             711.580074
494022308200010  2014  9            1151.567753
494022308210001  2014  9             604.783065
494032427400037  2014  9            2655.125650
4940510

## CONED Monthly

In [None]:
%%time
from icap.coned.coned import CONEDMonthly
c = CONEDMonthly(conn=conn)

In [None]:
%%time
c.compute_mcd()

CPU times: user 7min 56s, sys: 716 ms, total: 7min 57s
Wall time: 7min 57s


In [None]:
icap = c.compute_icap()

In [6]:
icap.head()

Unnamed: 0,RunDate,ISO,Utility,PremiseId,Year,RateClass,Strata,MeterType,ICap
0,2017-01-11 13:47:25.689545,PJM,CONED,211304006900006,2017,9,9,DMD,86.849577
1,2017-01-11 13:47:25.689545,PJM,CONED,211306319000002,2015,9,9,DMD,67.566771
2,2017-01-11 13:47:25.689545,PJM,CONED,211306319000002,2016,9,9,DMD,82.011537
3,2017-01-11 13:47:25.689545,PJM,CONED,211306319000002,2017,9,9,DMD,76.797542
4,2017-01-11 13:47:25.689545,PJM,CONED,211306350500001,2015,9,9,DMD,3.117239


In [None]:
r = Results(conn, icap)

In [None]:
r.analyze_comparison(write_to_excel=True)

In [10]:
results = r.compare_.copy()

In [11]:
null_idx = results[pd.isnull(results['HistVar'])].index
valid_idx = results[results['HistVar'] <= 2.0].index
invalid_idx = results[results['HistVar'] > 2.0].index

In [12]:
# assign values to outcomes on their index
results['Valid'] = ''
results.set_value(null_idx, 'Valid', 'NULL')
results.set_value(invalid_idx, 'Valid', 0)
results.set_value(valid_idx, 'Valid', 1)

Unnamed: 0,RunDate,ISO,Utility,PremiseId,Year,RateClass,Strata,MeterType,ICap,CapacityTagValue,HistVar,Valid
0,2017-01-11 12:54:48.692538,PJM,CONED,211304006900006,2017,9,9,,86.849577,,,
1,2017-01-11 12:54:48.692538,PJM,CONED,211306319000002,2015,9,9,,67.566771,75.2000,10.150570,0
2,2017-01-11 12:54:48.692538,PJM,CONED,211306319000002,2016,9,9,,82.011537,101.0000,18.800458,0
3,2017-01-11 12:54:48.692538,PJM,CONED,211306319000002,2017,9,9,,76.797542,,,
4,2017-01-11 12:54:48.692538,PJM,CONED,211306350500001,2015,9,9,,3.117239,3.3100,5.823586,0
5,2017-01-11 12:54:48.692538,PJM,CONED,211306350500001,2016,9,9,,3.914402,3.9231,0.221717,1
6,2017-01-11 12:54:48.692538,PJM,CONED,211306350500001,2017,9,9,,3.761557,,,
7,2017-01-11 12:54:48.692538,PJM,CONED,211306373500053,2015,9,9,,22.489403,24.3600,7.678968,0
8,2017-01-11 12:54:48.692538,PJM,CONED,211306373500053,2016,9,9,,16.346038,12.2915,32.986517,0
9,2017-01-11 12:54:48.692538,PJM,CONED,211306373500053,2017,9,9,,12.246620,,,


In [13]:
# aggregate and count
details = results.groupby(['MeterType', 'Year', 'RateClass',
                           'Strata', 'Valid']
                          )[['ICap', 'CapacityTagValue']].count()

In [14]:
details

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ICap,CapacityTagValue
MeterType,Year,RateClass,Strata,Valid,Unnamed: 5_level_1,Unnamed: 6_level_1


In [12]:
comp = r.compare_.copy()
null_idx = comp[pd.isnull(comp['HistVar'])].index
valid_idx = comp[comp['HistVar'] <= 2.0].index
invalid_idx = comp[comp['HistVar'] > 2.0].index

In [14]:
comp.ix[valid_idx].shape

(1856, 11)

In [15]:
comp.ix[invalid_idx].shape

(11057, 11)

In [17]:
r.analyze_comparison()

TypeError: unorderable types: str() <= float()

In [1]:
def is_tod(rate_class):
    """Returns proper REGEX based on rate class. Meters can be either
    TIME OF DAY (if TODQ == 1 then 'Ta
') or 
    NOT TIME OF DAY (if TODQ == 0 then '[^T]')
    
    if c.rc_map.ix[rate_class]['TODQ']:
        return 'T'
    return '[^T]'
    """
    if meter_logic == 'VTOU':
        return 'T'
    return '[^T]'
        

In [None]:
# Query the Load Shape Adjustment Table
lst_qry = "select * from CONED_LoadShapeTempAdj where Strata != ''"
lst = pd.read_sql(lst_qry, conn)

## OPTIMIZING THE LOAD PROFILE PROCESS
# 1) Merge the  Load Shape Adjustment Table with Temperature Variants on
#    the DAY[TYPE, OfWeek] columns. 
# 2) Then filter the table where [TEMP L BOUND] <= Max <= [TEMP U Bound]
tmp = pd.merge(lst, c.temp_var.reset_index(), left_on='DAYTYPE', right_on='DayOfWeek')
tmp = tmp[(tmp['TEMP L BOUND'] <= tmp['Max']) & (tmp['Max'] <= tmp['TEMP U BOUND'])]

In [None]:
tmp.sort_values(by=['ObservedDate'])
tmp2 = tmp.set_index('ObservedDate')

In [None]:
from datetime import datetime
from itertools import islice

"""When BilledDemand == 0, the MCD calculation chooses the zero value instead of
NormalizedUsage. Any location where a zero occurs is replaced with np.inf. This
ensures that when BilledDemand has a zero value, the NormalizedValue is selected
"""
c.monthly.replace(to_replace=0, value=np.inf, inplace=True)
c.monthly.head()

In [None]:
"""
Metered Coincident Demand (MCD)

MCD is computed differently for all three meter types. 
    METERTYPE:
        CON: MCD = Normalized Usage
        DMD: MCD = min(normalized usage, billed demand)
        INT: if variance < 4% then MCD = Usage on CPDayHour
             else MCD = min(normalized usage, billed demand)
"""
from datetime import datetime
from itertools import islice
error_counts = 0
rec_count = 0
start_time = datetime.now()


for rec in c.monthly.itertuples():
    
    # Parse the record index
    prem, year = rec[0]
    
    # Parse record
    rate_class, strata, zone, stratum, tod, \
    bill_start, bill_end, usage, demand, \
    meter_type, mcd, normalized_usage, \
    meter_logic, meter_regex = rec[1:]
    
    rate_class = int(rate_class)
   
    # Service class mapping
    service_class = c.rc_map.ix[rate_class]['Map']    
    
    # Slice billcycle from temperature variants
    billcycle = c.temp_var.ix[bill_start:bill_end]
    
    # Join bill cycle with modified LoadShapeAdjustmentTable (LST)
    local_lst = pd.merge(billcycle, c.lst, 
                        left_index=True, right_index=True, # index = ObservedDate
                        on=['Max', 'DayOfWeek'])

    # Filter for Straum condition
    local_lst = local_lst[(local_lst['STRAT L BOUND'] <= float(stratum)) & 
                          (float(stratum) <= local_lst['STRAT U BOUND'])]
    
   
    # Filter for TimeOfDay meter type and Service Class Mapping
    tod_mask = local_lst['STRATA'].str.contains(meter_regex)
    sc_mask = (local_lst['SC'] == c.rc_map.ix[rate_class]['Map'])
    mask = (tod_mask == 1) & (sc_mask == 1)
    local_lst = local_lst.ix[mask]
    
    # Check to ensure proper filtering has occurred
    if local_lst.shape[0] != billcycle.shape[0]:
        error_counts += 1
        continue
    
    # Extract the kiloWatt hour columns
    kw_cols = [col for col in local_lst.columns if 'KW' in col]
    local_lst = local_lst[kw_cols]
    
    # Convert coincident peak information into usable keys
    # Compute the Customer Scaling Factor
    # Extract the Load Profile from the billing cycle
    # Compute the normalized usage
    cp_day, hr = c.cp.ix[str(year)]
    csf = usage / local_lst.values.sum()
    load_profile = local_lst.ix[cp_day]['KW' + str(hr)]
    normalized_usage = load_profile * csf
  
    
    ## MCD ##
    # if 'DMD' then MCD = min(normalized_usage, billed_demand)
    # if 'CON' then MCD = normalized_usage
    if meter_type == 'DMD':
        mcd = np.minimum(normalized_usage, demand)
    else:
        mcd = normalized_usage
        
    # Update the monthly usage values
    c.monthly.loc[(prem, year), ['NormUsage', 'MCD']
                 ] = [normalized_usage, mcd]

    
    
    rec_count += 1
    if rec_count % 1000 == 0:
        print('Current: %d Percent: %.4f Errors: %d' %(
                rec_count, rec_count / c.monthly.shape[0], error_counts))


## Timing
elapsed_time = datetime.now() - start_time

print('Total Time: %s' % elapsed_time)
print('Average Time: %f' % (elapsed_time.total_seconds() / rec_count) )

In [None]:
c.monthly

In [None]:
tmp = pd.merge(c.monthly.reset_index(), c.util,
        how='left',
        left_on=['Year', 'ZoneCode'],
        right_on=['Year', 'Zone'])

In [None]:
%%time 

tmp['Factor'] = tmp['Factor'].apply(lambda x: 1.0 + x)

match_mask = tmp['MeterType_x'] == tmp['MeterType_y']
all_mask = tmp['MeterType_y'] == 'ALL'
mask = (match_mask == 1) | (all_mask == 1)

def coned_icap(g):
    #if g['ParameterId'].shape[0] != 2: # Subzone and Forecast Trueup Factors
    #    return np.nan
    #normalized_usage = g['MCD'].ix[0]
    mcd = g['MCD'].values[0]
    stf = g[g['ParameterId'] == 'SubzoneTrueupFactor']['Factor'].values[0]
    ftf = g[g['ParameterId'] == 'ForecastTrueupFactor']['Factor'].values[0]
    icap = mcd * stf * ftf

    return mcd * stf * ftf
    

In [None]:
icap = tmp.ix[mask].groupby(by=['PremiseId', 'Year', 'RateClass', 'MeterType_x', 'MeterLogic']
                    ).apply(coned_icap).reset_index()

In [None]:
icap[icap['PremiseId'] == '494141010200009']

In [None]:
icap[icap['PremiseId'] == '700536121000000']

In [None]:
historic = pd.read_sql("select * from CapacityTagHistorical where UtilityId = 'CONED'", conn)

In [None]:
icap['Year'] = icap['Year'].apply(lambda x: x + 1)

In [None]:
tmp = pd.merge(icap, historic, 
         left_on=['PremiseId', 'Year'],
        right_on=['PremiseID', 'CPYearID'])
tmp.rename(columns={0:'ICap'}, inplace=True)

tmp['Variance'] = (tmp['CapacityTagValue'] - tmp['ICap']) / tmp['CapacityTagValue'] * 100.

In [None]:
tmp['Valid'] = 0
valid_idx = tmp[tmp['Variance'] <= 2.0].index
tmp.set_value(valid_idx, 'Valid', 1)

tmp.groupby(['MeterType_x', 'Year', 'RateClass', 'Valid']
           )[['ICap', 'CapacityTagValue']].count()