In [1]:
# Import required packages

import glob
import random
import datetime
import os, fnmatch
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# from sklearn.externals import joblib

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.python.keras.models import model_from_json
from tensorflow.python.keras import optimizers


%matplotlib inline

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [3]:
df = pd.read_csv("../dataset/dbBills_cleaned.csv")

df = df.drop(df.columns[[0, 1]], 1)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
df = df.fillna(0)

df = df.sample(1000)
df.to_csv("../dataset/dbBills_cleaned_1000.csv")

df.head()

Unnamed: 0,xCycleCode,xFamilyNum,xFaze,xAmper,xCounterBuldingNo,xRegionName_Roustaei,xRegionName_Shahri,xUsageGroupName_Keshavarzi,xUsageGroupName_Khanegi,xUsageGroupName_Omoomi,xUsageGroupName_Sanati,xUsageGroupName_Sayer,xBakhshCode_1,xBakhshCode_2,xBakhshCode_4,xTimeControlCode_1,xTimeControlCode_2,xTimeControlCode_3,xTariffOldCode_1010,xTariffOldCode_1011,xTariffOldCode_1110,xTariffOldCode_1111,xTariffOldCode_1990,xTariffOldCode_2110,xTariffOldCode_2210,xTariffOldCode_2310,xTariffOldCode_2410,xTariffOldCode_2510,xTariffOldCode_2610,xTariffOldCode_2710,xTariffOldCode_2990,xTariffOldCode_2992,xTariffOldCode_3110,xTariffOldCode_3210,xTariffOldCode_3310,xTariffOldCode_3410,xTariffOldCode_3520,xTariffOldCode_3540,xTariffOldCode_3740,xTariffOldCode_3991,xTariffOldCode_4410,xTariffOldCode_4610,xTariffOldCode_4990,xTariffOldCode_5110,xTariffOldCode_5990,days_difference,month,mediumDailyUsage,highDailyUsage,lowDailyUsage,xMeduimKw,xHighKw,xLowKw
587929,6,1,1,25,76421105,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,9,16.428571,0.0,0.0,345,0,0
3993654,45,1,1,25,80202981,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,57,6,6.649123,0.0,0.0,379,0,0
550602,7,1,1,25,890064710,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63,2,6.619048,2.396825,3.444444,417,151,217
3306531,6,1,1,25,291050952,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55,11,5.218182,2.436364,3.8,287,134,209
999362,36,1,1,25,76420930,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,58,2,11.224138,0.0,0.0,651,0,0


In [None]:
def create_model(target):

    features = ['xCycleCode', 'xFamilyNum', 'xFaze', 'xAmper', 'xRegionName_Roustaei',
                'xRegionName_Shahri', 'xUsageGroupName_Keshavarzi', 'xUsageGroupName_Khanegi',
                'xUsageGroupName_Omoomi', 'xUsageGroupName_Sanati', 'xUsageGroupName_Sayer',
                'xBakhshCode_1', 'xBakhshCode_2', 'xBakhshCode_4',
                'xTimeControlCode_1', 'xTimeControlCode_2', 'xTimeControlCode_3',
                'xTariffOldCode_1010', 'xTariffOldCode_1011', 'xTariffOldCode_1110',
                'xTariffOldCode_1111', 'xTariffOldCode_1990', 'xTariffOldCode_2110',
                'xTariffOldCode_2210', 'xTariffOldCode_2310', 'xTariffOldCode_2410',
                'xTariffOldCode_2510', 'xTariffOldCode_2610', 'xTariffOldCode_2710',
                'xTariffOldCode_2990', 'xTariffOldCode_2992', 'xTariffOldCode_3110',
                'xTariffOldCode_3210', 'xTariffOldCode_3310', 'xTariffOldCode_3410', 
                'xTariffOldCode_3520', 'xTariffOldCode_3540', 'xTariffOldCode_3740', 
                'xTariffOldCode_3991', 'xTariffOldCode_4410', 'xTariffOldCode_4610', 
                'xTariffOldCode_4990', 'xTariffOldCode_5110', 'xTariffOldCode_5990',
                'days_difference', 'month']

    X = df[features]
    X = np.matrix(X.values.tolist())
    y = df[target]
    
    y=y.values.reshape(-1,1)
    scaler = MinMaxScaler()
    
    print(scaler.fit(y))
    xscale=scaler.transform(X)
    yscale=scaler.transform(y)
    
    X_train, X_test, y_train, y_test = train_test_split(xscale, yscale)
        
    model = Sequential()
    model.add(Dense(12, input_dim=46, kernel_initializer='normal', activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.summary()
        
    model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])
    model.fit(X_train, y_train, epochs=150, batch_size=50,  verbose=1, validation_split=0.2)
    
    scores = model.evaluate(X_test, y_test, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

    model_json = model.to_json()
    with open("{0}.json".format(target), "w") as json_file:
        json_file.write(model_json)
    model.save_weights("{0}.h5".format(target))
    print("Saved model to disk")

In [3]:
create_model("mediumDailyUsage")
create_model("highDailyUsage")
create_model("lowDailyUsage")

In [39]:
df = df.sample(1000)

In [40]:
features = ['xCycleCode', 'xFamilyNum', 'xFaze', 'xAmper', 'xRegionName_Roustaei',
            'xRegionName_Shahri', 'xUsageGroupName_Keshavarzi', 'xUsageGroupName_Khanegi',
            'xUsageGroupName_Omoomi', 'xUsageGroupName_Sanati', 'xUsageGroupName_Sayer',
            'xBakhshCode_1', 'xBakhshCode_2', 'xBakhshCode_4',
            'xTimeControlCode_1', 'xTimeControlCode_2', 'xTimeControlCode_3',
            'xTariffOldCode_1010', 'xTariffOldCode_1011', 'xTariffOldCode_1110',
            'xTariffOldCode_1111', 'xTariffOldCode_1990', 'xTariffOldCode_2110',
            'xTariffOldCode_2210', 'xTariffOldCode_2310', 'xTariffOldCode_2410',
            'xTariffOldCode_2510', 'xTariffOldCode_2610', 'xTariffOldCode_2710',
            'xTariffOldCode_2990', 'xTariffOldCode_2992', 'xTariffOldCode_3110',
            'xTariffOldCode_3210', 'xTariffOldCode_3310', 'xTariffOldCode_3410', 
            'xTariffOldCode_3520', 'xTariffOldCode_3540', 'xTariffOldCode_3740', 
            'xTariffOldCode_3991', 'xTariffOldCode_4410', 'xTariffOldCode_4610', 
            'xTariffOldCode_4990', 'xTariffOldCode_5110', 'xTariffOldCode_5990',
            'days_difference', 'month']
X = df[features]
X = np.matrix(X.values.tolist())
y_medium = df["mediumDailyUsage"]
y_high = df["highDailyUsage"]
y_low = df["lowDailyUsage"]

medium_json_file = open('../models/mediumDailyUsage.json', 'r')
medium_loaded_model_json = medium_json_file.read()
medium_json_file.close()
medium_loaded_model = model_from_json(medium_loaded_model_json)

high_json_file = open('../models/highDailyUsage.json', 'r')
high_loaded_model_json = high_json_file.read()
high_json_file.close()
high_loaded_model = model_from_json(high_loaded_model_json)

low_json_file = open('../models/lowDailyUsage.json', 'r')
low_loaded_model_json = low_json_file.read()
low_json_file.close()
low_loaded_model = model_from_json(low_loaded_model_json)

medium_loaded_model.load_weights("../models/mediumDailyUsage.h5")
high_loaded_model.load_weights("../models/highDailyUsage.h5")
low_loaded_model.load_weights("../models/lowDailyUsage.h5")

print("Loaded model from disk")


# print(medium_loaded_model.predict(X))
medium_loaded_model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

scores = medium_loaded_model.evaluate(X, y_medium, verbose=0)
print("%s: %.2f%%" % (medium_loaded_model.metrics_names[1], scores[1]*100))

Loaded model from disk
mean_squared_error: 73994.03%
