In [119]:
import pandas as pd
import numpy as np
from eemeter.weather.noaa import ISDWeatherSource
from eemeter.weather.location import (
    zipcode_to_usaf_station,
    zipcode_to_tmy3_station,
)
fname = "/vagrant/etl-natgrid-lime/test_data/hourly_traces_nw/new_28102.csv"
def read_hourly_trace_file(file):
    data = pd.read_csv(file, index_col=3, parse_dates=True)
    data['value'] = pd.to_numeric(data['value'], errors='coerce')
    data = data.rename(columns={'value' : 'energy', 'start' : 'index'})
    data = data.tz_localize('UTC', level=0).sort_index()
    mndata = data.resample('H')
    data = data[~data.index.duplicated(keep='first')]
    return data

def add_temp(df, zipcode):
    station = zipcode_to_usaf_station(zipcode)
    weather_source = ISDWeatherSource(station)
    tseries_index = pd.DataFrame(index=df.index)
    tseries_index = tseries_index.asfreq('H', method='ffill', fill_value=np.NaN)
    tempF = weather_source.indexed_temperatures(tseries_index.index,
                                                'degF',allow_mixed_frequency=True)
    
    tempF = tempF.to_frame()
    #print type(tempF[0])
    #df['tempF'] = tempF
    return pd.DataFrame( {
        'tempF' : tempF[0],
        'energy' : df['energy']
    }, index=df.index)

df = read_hourly_trace_file(fname)
#print df['energy']


df =  add_temp(df, '14068')


In [118]:
from hourly_model import DayOfWeekBasedLinearRegression
import patsy

dd = DayOfWeekBasedLinearRegression()
#print df.columns
dd.fit(df)
test_df = dd.add_time_day(df)
test_df = dd.add_hdd(test_df)
test_df = dd.add_cdd(test_df)

#print dd.model_res_weekday.params
xx, yy= patsy.dmatrices(dd.formula, test_df,return_type='dataframe')
features = yy.columns
#print features
#for idx, row in yy.iterrows():
#    #print row['Intercept']

def predict_with_wts(dataframe,  
            formula, 
            model_params, 
            hdd_wt=None, cdd_wt=None):
    reponse_dataframe, feature_dataframe = patsy.dmatrices(dd.formula,
                                                           dataframe,
                                                           return_type='dataframe')
    energy_pred_series = []
    
    for idx, row in feature_dataframe.iterrows():
        energy_pred = model_params['Intercept']
        for feature, feature_wt in model_params.items():
            if feature == 'hdd' and hdd_wt:
                energy_pred = energy_pred + row[feature] * hdd_wt
            elif feature == 'cdd' and cdd_wt:
                energy_pred = energy_pred + row[feature] * cdd_wt
            else:
                energy_pred = energy_pred + row[feature] * feature_wt
            
        energy_pred_series.append(energy_pred)
    return pd.DataFrame({
        'energy_predicted' : energy_pred_series,
        'hdd_wt' : hdd_wt,
        'cdd_wt' : cdd_wt
    }, index=dataframe.index)

def explore_hdd(dataframe, model_params, formula, step=0.01, num_iterations=10):
    hdd_wt = model_params['hdd']
    if hdd_wt < 0.0:
        step = step * -1
    epsilon = 0.001
    while num_iterations > 0:
        predict_with_wts(dataframe, formula, model_params, hdd_wt)
        hdd_wt = hdd_wt - step
        num_iterations = num_iterations - 1

explore_hdd(test_df, dd.model_res_weekday.params.to_dict(), dd.formula)
    


In [125]:
from hourly_model import DayOfWeekBasedLinearRegression
from caltrack_daily import CaltrackDailyModel

cd = CaltrackDailyModel(grid_search=True,min_contiguous_months=1,min_fraction_coverage=0.01)
cd.fit(df)
caltrack_pred, var = cd.predict(df, summed=False)
dd = DayOfWeekBasedLinearRegression()
dd.fit(df)
day_of_week, var = dd.predict(df, summed=False)

print pd.DataFrame({
    'actual' : df['energy'],
    'day' : day_of_week,
    'cal' : caltrack_pred
})


                              actual        cal        day
start                                                     
2017-06-01 00:00:00+00:00   6.195193   8.079866   6.064041
2017-06-01 01:00:00+00:00   5.874501   8.079866   5.891865
2017-06-01 02:00:00+00:00   3.912540   8.079866   5.601963
2017-06-01 03:00:00+00:00   3.597520   8.079866   5.532582
2017-06-01 04:00:00+00:00   3.978244   8.079866   5.272106
2017-06-01 05:00:00+00:00   3.790847   8.079866   5.133823
2017-06-01 06:00:00+00:00   4.937854   8.079866   5.812675
2017-06-01 07:00:00+00:00   4.449010   8.079866   5.726514
2017-06-01 08:00:00+00:00   4.027560   8.079866   6.126671
2017-06-01 09:00:00+00:00  13.404134   8.079866  14.006084
2017-06-01 10:00:00+00:00  10.692056   8.079866  12.707216
2017-06-01 11:00:00+00:00  11.938203   8.079866  12.840664
2017-06-01 12:00:00+00:00  13.699850   8.079866  13.112870
2017-06-01 13:00:00+00:00  11.829553   8.079866  13.190192
2017-06-01 14:00:00+00:00  10.289950   8.079866  13.1320