In [None]:
import pandas as pd
import numpy as np

### Historical

In [None]:
# historical data
hist_types = {'0':object, '1':object, '2':np.float64, '3':np.float64}
hist_cols = ['UtilityId', 'PremiseId', 'Year', 'HistoricalICap']
historical = pd.read_csv('historical_icap.csv', header=None, 
                         names=hist_cols)

historical.Year = pd.to_numeric(historical.Year, downcast='float', errors='coerce')


### Recipes

In [None]:
recipe_types = {'0': object, '1': object, '2': object, '3': object,
 '4': object,'5': np.int, '6': object, '7': object,'8':object, '9': np.float}

recipes = pd.read_csv('all_recipes.csv', header=0)
recipes.drop(['RunDate', 'RunTime'], axis=1, inplace=True)
recipes.rename(columns={'Utility':'UtilityId'}, inplace=True)


recipes.RecipeICap = pd.to_numeric(recipes.RecipeICap, errors='coerce')
recipes.Year = pd.to_numeric(recipes.Year, downcast='float', errors='coerce')


### Predictions

In [None]:
predictions = pd.read_csv('all_predictions.csv', header=None, dtype=object)

predictions.drop([0, 1, 2, 6, 7], axis=1, inplace=True)
predictions.columns = ['UtilityId', 'PremiseId', 'PredictionYear', 'PredictionICap', 
                      'PredictionUnc', 'YearCount', 'Samples']

predictions.PredictionYear = pd.to_numeric(predictions.PredictionYear, 
                                           downcast='float', 
                                           errors='coerce')
predictions.PredictionICap = pd.to_numeric(predictions.PredictionICap, 
                                           downcast='float',
                                          errors='coerce')

### Merging All Values

In [None]:
# historical, recipes, predictions



In [None]:
temp =recipes.merge(historical, on=['UtilityId', 'PremiseId', 'Year']
                  ).merge(predictions, on=['UtilityId', 'PremiseId'],
                         how='outer')

hist = (temp.HistoricalICap > 0.)
rec  = (temp.RecipeICap > 0.)

temp = temp[ hist & rec ]
temp.sort_values(by=['PremiseId', 'Year'], inplace=True);
#temp.fillna(value='null', inplace=True);
temp = temp[temp.UtilityId == 'PPL']

In [None]:
#ppl = temp[temp.UtilityId == 'PPL']
#ppl.sort_values(by=['PremiseId','Year'], inplace=True)
'''
ppl['pct_ch'] = ppl.groupby('PremiseId')['RecipeICap'].pct_change() + 1
ppl['mad'] = ppl.groupby('PremiseId')['RecipeICap'].mad()
'''

In [None]:
temp.groupby('PremiseId').apply(lambda x: if x.shape[0] < 2)

# Aggregation

In [29]:
import pandas as pd
import numpy as np
import glob
import os

In [14]:
# Path is relative to current directory
# Import all CSV files
path = '/home/miles/Dropbox/iCAP_Project/Results/Analysis'
file_names = glob.glob(path + '/[0-9]*.csv')

In [15]:
file_names

['/home/miles/Dropbox/iCAP_Project/Results/Analysis/20170206_centhud_rec.csv',
 '/home/miles/Dropbox/iCAP_Project/Results/Analysis/20170206_pseg_rec.csv',
 '/home/miles/Dropbox/iCAP_Project/Results/Analysis/20170206_peco_rec.csv',
 '/home/miles/Dropbox/iCAP_Project/Results/Analysis/20170206_ppl_rec.csv',
 '/home/miles/Dropbox/iCAP_Project/Results/Analysis/20170206_coned_rec.csv']

In [21]:
# Load all CSV files into memory
# Concatenate into single DataFrame
df = pd.DataFrame()
list_ = []
for file_ in file_names:
    df = pd.read_csv(file_, index_col=None, header=0)
    list_.append(df)
df = pd.concat(list_)

In [22]:
df.columns

Index(['RunDate', 'ISO', 'UtilityId', 'PremiseId', 'Year', 'RateClass',
       'Strata', 'MeterType', 'RecipeICap', 'HistoricalICap', 'HistVar'],
      dtype='object')

In [23]:
temp = df.copy()

In [24]:
grp = temp.groupby('PremiseId')

mad = grp['RecipeICap'].transform(lambda x: x.mad())
pct = grp['RecipeICap'].transform(lambda x: x.pct_change() * 100)
#prem_min = grp[['RecipeICap', 'Year']].transfrom(lambda x: x.ix[x.RecipeICap.idxmax()])

  return np.abs(demeaned).mean(axis=axis, skipna=skipna)


In [25]:
temp['TagVariability'] = mad
temp['PercentChange'] = pct
#temp.fillna(value='null', inplace=True)
print(temp.shape)
temp.dropna(subset=['HistoricalICap'])
temp.fillna(value='null', inplace=True)

temp = temp.groupby('PremiseId').filter(lambda x: len(x) > 1)

(52515, 13)


In [43]:
temp['Year'] = pd.to_numeric(temp['Year'], downcast='float', errors='coerce')
temp = temp[temp['RecipeICap'] != 'null']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [None]:
container = {}
for iso_util_prem_year, record in temp.groupby(['ISO', 'UtilityId', 'PremiseId', 'Year']):
    iso, utility, premise, year = iso_util_prem_year

    year = str(int(year))
    
    # iso level
    if iso not in container.keys():
        container[iso] = {}
    iso_d = container[iso]
    
    # utility 
    if utility not in iso_d.keys():
        iso_d[utility] = {'aggregate':{}, 'records':{}}
    util_agg_d = iso_d[utility]['aggregate']
    util_rec_d = iso_d[utility]['records']
    

    # update utility meta data
    if year not in util_agg_d.keys():
        util_agg_d[year] = {'min':0., 'max':0., 'count':0., 'total':0}

    util_year_d = util_agg_d[year]
    
    recipe_value = record.RecipeICap.values[0]
    til_year_d['min'] = min([util_year_d['min'], recipe_value])
    util_year_d['max'] = max([util_year_d['max'], recipe_value])
    util_year_d['total'] += recipe_value
    util_year_d['count'] += 1

    # premise
    if premise not in util_rec_d.keys():
        util_rec_d[premise] = {'metadata':{'min':(0, 1000000), 'max':(0, 0.), 'count':0., 'total':0}}
    prem_d = util_rec_d[premise]
    prem_agg_d = util_rec_d[premise]['metadata']
    
    meta_cols = ['ISO', 'MeterType', 'RateClass', 'Strata', 'UtilityId', 'PremiseId',
                'TagVariability']
    prem_meta_d = record[meta_cols].to_dict(orient='records')[0]
    
    prem_agg_d['min'] = min([prem_agg_d['min'], (year, recipe_value)], key=lambda tup: tup[1])
    prem_agg_d['max'] = max([prem_agg_d['max'], (year, recipe_value)], key=lambda tup: tup[1])
    prem_agg_d['total'] += recipe_value
    prem_agg_d['count'] += 1

   
    for k, v in prem_meta_d.items():
        if k not in prem_agg_d.keys():
            prem_agg_d[k] = v
            
    
    
    # year
    if year not in prem_d.keys():
        prem_d[year] = {}
    year_d = prem_d[year]
    
    
    
    for k,v in record.drop(meta_cols, axis=1).to_dict(orient='records')[0].items():
        year_d[k] = v
    
    
                    

0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
1.1199 <class 'float'>
1.8146 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
1.1199 <class 'float'>
1.8146 <class 'float'>
1.1199 <class 'float'>
1.8146 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 <class 'float'>
1.081 <class 'float'>
0.5177 

In [None]:
container

In [None]:
import json
with open('ppl_premise_explorer.json', 'w') as f:
    json.dump(container, f, indent=4, separators=(',',': '))

# Testing for Javier

In [None]:
arr = np.arange(0, 100).reshape((10,10))

In [None]:
arr.max()
arr.min()