In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

# Suppress convergence and future warnings
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

In [2]:
# Load the labeled dataset
main = pd.read_csv(r'C:\Users\rodcan\Documents\Proyecto de Investigacion\UC Davis\Indoor Water Disaggregation Data\mining\resources\Algorithms\classification\wadepts\Hourly Data - ALL wadepts - labeled.csv')
# Define the water department list
wadept = ["Denver","Fort Collins","Scottsdale","San Antonio"];

# Create an empty array to append the classification models.
models = []
# Append the models
models.append(("Logistic Regression:",LogisticRegression()))
models.append(("Naive Bayes:",GaussianNB()))
models.append(("K-Nearest Neighbour:",KNeighborsClassifier(n_neighbors=3)))
models.append(("Decision Tree:",DecisionTreeClassifier()))
models.append(("Support Vector Machine-linear:",SVC(kernel="linear")))
models.append(("Support Vector Machine-rbf:",SVC(kernel="rbf")))
models.append(("Random Forest:",RandomForestClassifier(n_estimators=7)))
models.append(("eXtreme Gradient Boost:",XGBClassifier()))
models.append(("MLP:",MLPClassifier(hidden_layer_sizes=(45,30,15),solver='sgd',learning_rate_init=0.01,max_iter=500)))
models.append(("AdaBoostClassifier:",AdaBoostClassifier()))
models.append(("GradientBoostingClassifier:",GradientBoostingClassifier()))

print('Models appended...')

# Perform each classification model to every water department
for i in range(0,len(wadept)):
    # Defining the work dataset by matching the water departments to the wadept column in the main database
    df = main[main['wadept'].str.match(wadept[i])]
    X = df.drop(['Keycode','Date','wadept'], axis=1) # hours and labels
    Y = df.iloc[:,4] # only the labels
    
    from sklearn.model_selection import train_test_split
    # Dividing the data into training and testing
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

    # Creating empty arrays to append the results
    results = []
    names = []
    
    from sklearn.metrics import classification_report
    # Performing each methodology in the models array
    for name,model in models:
        kfold = KFold(n_splits=10, random_state=0)
        cv_result = cross_val_score(model,X_train,Y_train.values.ravel(), cv = kfold,scoring = "accuracy")
        names.append(name)
        results.append(cv_result)
    print(wadept[i]+"---------------------------------------------------")
    # Printing the results
    for i in range(len(names)):
        print(names[i],results[i].mean()*100)

Models appended...
Denver---------------------------------------------------
Logistic Regression: 99.48043184885289
Naive Bayes: 100.0
K-Nearest Neighbour: 95.85695006747639
Decision Tree: 100.0
Support Vector Machine-linear: 100.0
Support Vector Machine-rbf: 82.6450742240216
Random Forest: 99.22402159244264
eXtreme Gradient Boost: 100.0
MLP: 81.06612685560053
AdaBoostClassifier: 89.35897435897436
GradientBoostingClassifier: 100.0
Fort Collins---------------------------------------------------
Logistic Regression: 100.0
Naive Bayes: 100.0
K-Nearest Neighbour: 99.91471215351811
Decision Tree: 100.0
Support Vector Machine-linear: 100.0
Support Vector Machine-rbf: 100.0
Random Forest: 100.0
eXtreme Gradient Boost: 100.0
MLP: 98.26923076923076
AdaBoostClassifier: 91.35886501558144
GradientBoostingClassifier: 100.0
Scottsdale---------------------------------------------------
Logistic Regression: 100.0
Naive Bayes: 100.0
K-Nearest Neighbour: 99.84256889470865
Decision Tree: 100.0
Support Ve