In [2]:
import glob
import random
import datetime
import os, fnmatch
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# from sklearn.externals import joblib

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.python.keras.models import model_from_json
from tensorflow.python.keras import optimizers


%matplotlib inline

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [23]:
df = pd.read_csv("../dataset/dbBills_cleaned_1000.csv")

df = df.drop(df.columns[[0]], axis=1)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
df = df.fillna(0)

df = df.sample(1000)
# df.to_csv("../dataset/dbBills_cleaned_1000.csv")

df.head()

Unnamed: 0,xCycleCode,xFamilyNum,xFaze,xAmper,xCounterBuldingNo,xRegionName_Roustaei,xRegionName_Shahri,xUsageGroupName_Keshavarzi,xUsageGroupName_Khanegi,xUsageGroupName_Omoomi,xUsageGroupName_Sanati,xUsageGroupName_Sayer,xBakhshCode_1,xBakhshCode_2,xBakhshCode_4,xTimeControlCode_1,xTimeControlCode_2,xTimeControlCode_3,xTariffOldCode_1010,xTariffOldCode_1011,xTariffOldCode_1110,xTariffOldCode_1111,xTariffOldCode_1990,xTariffOldCode_2110,xTariffOldCode_2210,xTariffOldCode_2310,xTariffOldCode_2410,xTariffOldCode_2510,xTariffOldCode_2610,xTariffOldCode_2710,xTariffOldCode_2990,xTariffOldCode_2992,xTariffOldCode_3110,xTariffOldCode_3210,xTariffOldCode_3310,xTariffOldCode_3410,xTariffOldCode_3520,xTariffOldCode_3540,xTariffOldCode_3740,xTariffOldCode_3991,xTariffOldCode_4410,xTariffOldCode_4610,xTariffOldCode_4990,xTariffOldCode_5110,xTariffOldCode_5990,days_difference,month,mediumDailyUsage,highDailyUsage,lowDailyUsage,xMeduimKw,xHighKw,xLowKw
586,32,1,1,15,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,55,9,0.036364,0.0,0.0,2,0,0
810,11,1,1,25,81021403,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,58,5,5.034483,0.0,0.0,292,0,0
942,20,1,1,25,787163123,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,53,1,1.490566,0.773585,1.320755,79,41,70
517,5,1,1,25,8437479523,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63,6,13.920635,0.0,0.0,877,0,0
516,38,1,1,25,87285977,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60,6,5.883333,2.216667,3.016667,353,133,181


In [24]:
def create_model(target):

    features = ['xCycleCode', 'xFamilyNum', 'xFaze', 'xAmper', 'xRegionName_Roustaei',
                'xRegionName_Shahri', 'xUsageGroupName_Keshavarzi', 'xUsageGroupName_Khanegi',
                'xUsageGroupName_Omoomi', 'xUsageGroupName_Sanati', 'xUsageGroupName_Sayer',
                'xBakhshCode_1', 'xBakhshCode_2', 'xBakhshCode_4',
                'xTimeControlCode_1', 'xTimeControlCode_2', 'xTimeControlCode_3',
                'xTariffOldCode_1010', 'xTariffOldCode_1011', 'xTariffOldCode_1110',
                'xTariffOldCode_1111', 'xTariffOldCode_1990', 'xTariffOldCode_2110',
                'xTariffOldCode_2210', 'xTariffOldCode_2310', 'xTariffOldCode_2410',
                'xTariffOldCode_2510', 'xTariffOldCode_2610', 'xTariffOldCode_2710',
                'xTariffOldCode_2990', 'xTariffOldCode_2992', 'xTariffOldCode_3110',
                'xTariffOldCode_3210', 'xTariffOldCode_3310', 'xTariffOldCode_3410', 
                'xTariffOldCode_3520', 'xTariffOldCode_3540', 'xTariffOldCode_3740', 
                'xTariffOldCode_3991', 'xTariffOldCode_4410', 'xTariffOldCode_4610', 
                'xTariffOldCode_4990', 'xTariffOldCode_5110', 'xTariffOldCode_5990',
                'days_difference', 'month']

    X = df[features]
    X = np.matrix(X.values.tolist())
    y = df[target]
    
    y=y.values.reshape(-1,1)
    scaler = MinMaxScaler()
    
    print(scaler.fit(y))
    xscale=scaler.transform(X)
    yscale=scaler.transform(y)
    
    X_train, X_test, y_train, y_test = train_test_split(xscale, yscale)
        
    model = Sequential()
    model.add(Dense(12, input_dim=46, kernel_initializer='normal', activation='linear'))
    model.add(Dense(8, activation='linear'))
    model.add(Dense(6, activation='linear'))
    model.add(Dense(1, activation='linear'))
    model.summary()
        
    model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])
    model.fit(X_train, y_train, epochs=300, batch_size=50,  verbose=0, validation_split=0.2)
    
    scores = model.evaluate(X_test, y_test, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

    model_json = model.to_json()
    with open("{0}.json".format(target), "w") as json_file:
        json_file.write(model_json)
    model.save_weights("{0}.h5".format(target))
    print("Saved model to disk")

In [25]:
create_model("mediumDailyUsage")
create_model("highDailyUsage")
create_model("lowDailyUsage")

MinMaxScaler(copy=True, feature_range=(0, 1))
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_46 (Dense)             (None, 12)                564       
_________________________________________________________________
dense_47 (Dense)             (None, 8)                 104       
_________________________________________________________________
dense_48 (Dense)             (None, 1)                 9         
Total params: 677
Trainable params: 677
Non-trainable params: 0
_________________________________________________________________
mean_squared_error: 0.23%
Saved model to disk
MinMaxScaler(copy=True, feature_range=(0, 1))
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_49 (Dense)             (None, 12)                564       
_________________________________________________________________
dense_

In [26]:
df = df.sample(20)
df

Unnamed: 0,xCycleCode,xFamilyNum,xFaze,xAmper,xCounterBuldingNo,xRegionName_Roustaei,xRegionName_Shahri,xUsageGroupName_Keshavarzi,xUsageGroupName_Khanegi,xUsageGroupName_Omoomi,xUsageGroupName_Sanati,xUsageGroupName_Sayer,xBakhshCode_1,xBakhshCode_2,xBakhshCode_4,xTimeControlCode_1,xTimeControlCode_2,xTimeControlCode_3,xTariffOldCode_1010,xTariffOldCode_1011,xTariffOldCode_1110,xTariffOldCode_1111,xTariffOldCode_1990,xTariffOldCode_2110,xTariffOldCode_2210,xTariffOldCode_2310,xTariffOldCode_2410,xTariffOldCode_2510,xTariffOldCode_2610,xTariffOldCode_2710,xTariffOldCode_2990,xTariffOldCode_2992,xTariffOldCode_3110,xTariffOldCode_3210,xTariffOldCode_3310,xTariffOldCode_3410,xTariffOldCode_3520,xTariffOldCode_3540,xTariffOldCode_3740,xTariffOldCode_3991,xTariffOldCode_4410,xTariffOldCode_4610,xTariffOldCode_4990,xTariffOldCode_5110,xTariffOldCode_5990,days_difference,month,mediumDailyUsage,highDailyUsage,lowDailyUsage,xMeduimKw,xHighKw,xLowKw
368,40,1,1,25,6080015556,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63,6,7.460317,0.0,0.0,470,0,0
680,3,1,1,25,8336625471,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,65,12,0.015385,0.0,0.0,1,0,0
734,11,1,1,5,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60,6,4.616667,0.0,0.0,277,0,0
279,36,1,1,25,8538125818,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,62,6,7.725806,0.0,0.0,479,0,0
859,2,1,1,25,2219220264566,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,52,6,3.211538,1.365385,2.384615,167,71,124
44,22,1,1,10,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,63,5,2.301587,0.0,0.0,145,0,0
267,47,1,1,25,0,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,72,12,8.652778,0.0,0.0,623,0,0
956,17,1,1,25,87273589,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49,5,3.020408,1.285714,2.673469,148,63,131
381,34,1,1,25,81792372,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63,7,7.111111,0.0,0.0,448,0,0
642,36,1,3,25,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,53,2,0.0,0.0,0.0,0,0,0


In [27]:
features = ['xCycleCode', 'xFamilyNum', 'xFaze', 'xAmper', 'xRegionName_Roustaei',
            'xRegionName_Shahri', 'xUsageGroupName_Keshavarzi', 'xUsageGroupName_Khanegi',
            'xUsageGroupName_Omoomi', 'xUsageGroupName_Sanati', 'xUsageGroupName_Sayer',
            'xBakhshCode_1', 'xBakhshCode_2', 'xBakhshCode_4',
            'xTimeControlCode_1', 'xTimeControlCode_2', 'xTimeControlCode_3',
            'xTariffOldCode_1010', 'xTariffOldCode_1011', 'xTariffOldCode_1110',
            'xTariffOldCode_1111', 'xTariffOldCode_1990', 'xTariffOldCode_2110',
            'xTariffOldCode_2210', 'xTariffOldCode_2310', 'xTariffOldCode_2410',
            'xTariffOldCode_2510', 'xTariffOldCode_2610', 'xTariffOldCode_2710',
            'xTariffOldCode_2990', 'xTariffOldCode_2992', 'xTariffOldCode_3110',
            'xTariffOldCode_3210', 'xTariffOldCode_3310', 'xTariffOldCode_3410', 
            'xTariffOldCode_3520', 'xTariffOldCode_3540', 'xTariffOldCode_3740', 
            'xTariffOldCode_3991', 'xTariffOldCode_4410', 'xTariffOldCode_4610', 
            'xTariffOldCode_4990', 'xTariffOldCode_5110', 'xTariffOldCode_5990',
            'days_difference', 'month']
X = df[features]
X = np.matrix(X.values.tolist())
y_medium = df["mediumDailyUsage"]
y_high = df["highDailyUsage"]
y_low = df["lowDailyUsage"]

medium_json_file = open('mediumDailyUsage.json', 'r')
medium_loaded_model_json = medium_json_file.read()
medium_json_file.close()
medium_loaded_model = model_from_json(medium_loaded_model_json)

high_json_file = open('highDailyUsage.json', 'r')
high_loaded_model_json = high_json_file.read()
high_json_file.close()
high_loaded_model = model_from_json(high_loaded_model_json)

low_json_file = open('lowDailyUsage.json', 'r')
low_loaded_model_json = low_json_file.read()
low_json_file.close()
low_loaded_model = model_from_json(low_loaded_model_json)

medium_loaded_model.load_weights("mediumDailyUsage.h5")
high_loaded_model.load_weights("highDailyUsage.h5")
low_loaded_model.load_weights("lowDailyUsage.h5")

print("Loaded model from disk")


print(medium_loaded_model.predict(X))
medium_loaded_model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

scores = medium_loaded_model.evaluate(X, y_medium, verbose=0)
print("%s: %.2f%%" % (medium_loaded_model.metrics_names[1], scores[1]*100))

Loaded model from disk
[[ 2.6219387 ]
 [ 0.1199874 ]
 [-2.2441587 ]
 [ 2.3779163 ]
 [-1.4756204 ]
 [ 0.3252305 ]
 [ 1.4938735 ]
 [-1.7528592 ]
 [ 2.3728313 ]
 [14.283682  ]
 [-3.1839542 ]
 [-1.4401628 ]
 [ 4.0192833 ]
 [-3.058323  ]
 [-3.503996  ]
 [-1.0148813 ]
 [ 3.8036804 ]
 [-2.888813  ]
 [-3.1241512 ]
 [-0.34284028]]
mean_squared_error: 4928.28%
