# CORRELATION

## Connect to DB

In [1]:
%matplotlib inline 

In [2]:
import pandas as pd
from icap.database.icapdatabase import ICapDatabase
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [3]:
fp = 'icap/database/icapdatabase.json'
conn = ICapDatabase(fp).connect()

## Premise loading

In [None]:
df = pd.read_sql("select distinct top(100) PremiseId from HourlyUsage where UtilityId = 'PPL'", conn)

In [None]:
prem = df.PremiseId.values
prem.shape

In [None]:
'''
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)

from datetime import datetime
start = datetime.now()
count = 0
for p in prem[2:10]:
    params = {'year':2015, 'iso':'PJM-RTO', 'utility':'PPL', 'premise':p}
    corr = Correlation(conn, **params)
    r = corr.analyze()
    results = r.results()
    break
'''

## Testing the API

In [None]:
import requests
url = 'http://ec2-54-159-89-185.compute-1.amazonaws.com:3000'
endpoint =  '/correlation/2015/pjm-rto/ppl/9997049009'

req = url + endpoint

print(req)

In [None]:
r = requests.get(req)
r.status_code


In [None]:
endpoint_temp = '/correlation/2015/pjm-rto/{utility}/{premise}'
endpoint_peco = endpoint_temp.format(**{'utility':'peco', 'premise':'0013100707'})

In [None]:
req = url + endpoint_peco
r = requests.get(req)
r.status_code

In [None]:
r.json()

In [None]:
pd.DataFrame(r.json()['premise_record']).groupby('PeakDay').count()

r.json()['premise_record']

## Expanding Correlation Analysis

In [7]:
# load a test case
import importlib
from icap.correlation.correlation import Correlation

#importlib.reload(icap.correlation.correlation)

c = Correlation(conn,  
                iso='pjm-rto', utility='ppl', 
                premise='9997049009')


c = Correlation(conn,  
                iso='pjm-rto', utility='peco', 
                premise='0013100707') 

In [8]:
# run the correlation analysis
cr = c.analyze()

In [9]:
cr.results()['premise_record']

[{'CoincidentPeak': 0,
  'PeakDay': False,
  'PremNormalizedUsage': 0.10344827586206895,
  'PremUsage': 0.72,
  'UsageDate': 'Wed May 21 2014 01:00:00',
  'Year': 2014},
 {'CoincidentPeak': 0,
  'PeakDay': False,
  'PremNormalizedUsage': 0.09770114942528736,
  'PremUsage': 0.68,
  'UsageDate': 'Wed May 21 2014 02:00:00',
  'Year': 2014},
 {'CoincidentPeak': 0,
  'PeakDay': False,
  'PremNormalizedUsage': 0.09195402298850575,
  'PremUsage': 0.64,
  'UsageDate': 'Wed May 21 2014 03:00:00',
  'Year': 2014},
 {'CoincidentPeak': 0,
  'PeakDay': False,
  'PremNormalizedUsage': 0.10919540229885057,
  'PremUsage': 0.76,
  'UsageDate': 'Wed May 21 2014 04:00:00',
  'Year': 2014},
 {'CoincidentPeak': 0,
  'PeakDay': False,
  'PremNormalizedUsage': 0.10919540229885057,
  'PremUsage': 0.76,
  'UsageDate': 'Wed May 21 2014 05:00:00',
  'Year': 2014},
 {'CoincidentPeak': 0,
  'PeakDay': False,
  'PremNormalizedUsage': 0.10344827586206895,
  'PremUsage': 0.72,
  'UsageDate': 'Wed May 21 2014 06:00:00

***FLAG PEAK DAY***<br>
The UI requires _Peak Days_ to be flagged for the filtering mechanism. To find the those peak days, the selection is made base on the greatest usage value PER day. Returning only top hours results in multiple hours for the same day.

In [None]:
# extract the ISO_Record
iso = pd.DataFrame(cr.iso_record)

# obtain index values of highest Zoneusage per day
idx = iso.groupby('UsageDate')['ZoneUsage'].transform(max) == iso['ZoneUsage']

# select only the top 20 Usage values
peak_values = iso[idx].sort_values(by='ZoneUsage', ascending=False)[:20].index

# add the PeakDay column and update values
iso['PeakDay'] = False
for v in peak_values:
    iso.set_value(v, 'PeakDay', True) 

### Test the api for 100 premises

In [None]:
# time tracking
from datetime import datetime

# empty array for timeslots
deltas = np.zeros(100, dtype=np.float)
i=0
for p in prem[:100]:
    url = 'http://ec2-54-159-89-185.compute-1.amazonaws.com:3000'
    endpoint =  '/correlation/2015/pjm-rto/ppl/%s' % p
    
    req = url + endpoint
    start = datetime.now()
    r = requests.get(req)
    timing = (datetime.now() - start).total_seconds()
    deltas[i] = timing
    i += 1

In [None]:
from scipy import stats
stats.describe(deltas)

In [None]:
import seaborn as sns
sns.distplot(deltas)

In [None]:
compare = pd.merge(hist, res.results(), on=['Premise', 'Year'])
compare['HistVar'] = abs((
    compare.CapacityTagValue - compare.ICap
    )/compare.CapacityTagValue * 100.0)
compare['MeterType'] = s.meter_type

In [None]:
compare

In [None]:
p = PSEGInterval(conn, ) 

In [None]:
import pandas as pd
df = pd.DataFrame({'M':{'a':1}})

In [None]:
df.empty

# Updating Timestamp

In [None]:
from icap.correlation.correlation import Premise

In [None]:
prem = df.ix[40,'PremiseId']
yr = '2015'; utility = 'PPL'

In [None]:
p = Premise(conn=conn, year=yr, utility=utility, premise=prem)

In [None]:
# load the records
p.initialize()

In [None]:
p.history_[p.history_['CoincidentPeak'] == 1]

In [None]:
type(p.history_.ix[0]['UsageDate'])

In [None]:
cp_query = """
    select
        CPDate,
        cast(HourEnding as int) as HourEnding
    from [CoincidentPeak]
    where
        UtilityId = '{utility}'"""

cp_query = cp_query.format(utility=utility)
cp_df = pd.read_sql(cp_query, conn)
cp_df['CPDate'] = pd.to_datetime(cp_df['CPDate']) 

In [None]:
p.history_['CoincidentPeak'] = 0
p.history_.set_value(cp_dates, 'CoincidentPeak', 1);


In [None]:
p.history_[p.history_['CoincidentPeak'] == 1]

In [None]:
cp_dates = p.history_.reset_index().merge(cp_df,
    how='inner',
    left_on=['UsageDate', 'HourEnding'],
    right_on=['CPDate', 'HourEnding']).set_index('index').index

In [None]:
df['CoincidentPeak'] = 0
df.set_value(cp_idx, 'CoincidentPeak', 1)

In [None]:
from icap.correlation.correlation import Correlation

In [None]:
c = Correlation(conn=conn, year=yr, iso='pjm-rto', utility='ppl', premise=prem)

In [None]:
cr = c.analyze()

In [None]:
cr.results()['premise_record']

# Multi-year Default View

In [None]:
%matplotlib inline 

In [None]:
import pandas as pd
from icap.database.icapdatabase import ICapDatabase
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [None]:
fp = 'icap/database/icapdatabase.json'
conn = ICapDatabase(fp).connect()

In [None]:
from icap.correlation.correlation import Premise

In [None]:
premise = Premise(conn=conn, utility='peco', premise='0013100707')

In [None]:
premise.initialize()

In [None]:
col = [c for c in premise.history_.columns if 'NormalizedUsage' in c][0]

In [None]:
prem = premise.history_.copy()

In [None]:
mask_day_maxes = prem.groupby(['Year', 'UsageDate'])[col].transform(max) == prem[col];
idx_year_maxes = prem[mask_day_maxes].groupby('Year')[col].nlargest(50).reset_index(level=0).index

In [None]:
prem.ix[idx_year_maxes]

In [None]:
prem.groupby(['Year', 'UsageDate'])[col].nlargest(50)

In [None]:
peak_values.groupby('Year')[col].nlargest(50).reset_index(level=0);

In [None]:
pd.crosstab(premise.history_['Year'], premise.history_['PeakDay'], margins=True)

In [None]:
col

In [None]:
idx premise.history_.groupby(['Year'])