In [None]:
from nilmtk.disaggregate import Disaggregator
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
import numpy as np
import pandas as pd

### The NILMTK API has some methods already implemented. But its value comes from enabling us to develop our methods and use them on their framework.

### In this example I will show you how to implement a simple SVM.

#### The first thing we need is to create a class that inherits from the Disaggregator class and has 4 methods
#### You can find bellow the complete class.

In [None]:
class Svm(Disaggregator):
    def __init__(self, params):

    def partial_fit(self, train_main, train_appliances, **load_kwargs):
        ##TODO
                        
    def disaggregate_chunk(self, test_mains):
        ##TODO

    def save_model(self, folder_name):
        ##TODO

    def load_model(self, folder_name):
        ##TODO

#### In the  __ init__  we receive the params argument which is a dictionary with all the information needed for our model.
In our case we don't need any information as our model will do a grid search for the best parameters.

In [None]:
    def __init__(self, params):
        self.model = {}
        self.MODEL_NAME = 'SVM'

#### The partial_fit method is called to train the algorithm. 
#### The train_main contains the aggregated energy recorded on the datasets  chosen.
#### The train_appliances contains the energy recorded for the appliances in the datasets chosen
#### The load_kwargs represent aditional arguments.

In this example we use only the apparent power from the first house to train the SVM.

Note: We train for each appliance a SVM and also save the best in the "model" dictionary.

In [None]:
    def partial_fit(self, train_main, train_appliances, **load_kwargs):
        x_train = train_main[0]["power"]["apparent"]    
        x_train = np.reshape( x_train.values, (np.size(x_train.values), 1) )

        for app_name, power in train_appliances:
            print("Training ", app_name, " in ", self.MODEL_NAME, " model\n", end="\r")
            
            y_train = power[0]["power"]["apparent"].values
            svm = SVR()

            param = [
                {
                    "kernel": ["rbf"],
                    "C": [0.03, 0.1, 0.3, 1]
                }
            ]
            clf = GridSearchCV(svm, param, cv=5, n_jobs=20, verbose=2)
            clf.fit(x_train, y_train)
            rbf = (clf.best_estimator_, clf.best_score_)
            
            param = [
                {
                    "kernel": ["poly"],
                    "degree": [2, 3, 4],
                    "C": [0.03, 0.1, 0.3, 1]
                }
            ]
            clf = GridSearchCV(svm, param, cv=5, n_jobs=20, verbose=2)
            clf.fit(x_train, y_train)
            poly = (clf.best_estimator_, clf.best_score_)

            if rbf[1] > poly[1]:
                print(rbf[0])
                self.model[app_name] = rbf[0]
            else:
                print(poly[0])
                self.model[app_name] = poly[0]

#### The disaggregate_chunk method is called when testing the algorithms.
#### This method only receivs the aggregated data of the datasets.
#### In this example we test the first building apparent power for each appliance.
#### The results are then saved  in a pandas Dataframe and returned to the general program for comparison.

In [None]:
    def disaggregate_chunk(self, test_mains):
        test_predictions_list = []
        x_test = test_mains[0]["power"]["apparent"]
        x_test = np.reshape( x_test.values, (np.size(x_test.values), 1) )

        appliance_powers_dict = {}

        for i, app_name in enumerate(self.model):

            print("Estimating power demand for '{}' in '{}'\n".format(app_name, self.MODEL_NAME))
            pred = self.model[app_name].predict(x_test)
            
            column = pd.Series(
                    pred, index=test_mains[0].index, name=i)
            appliance_powers_dict[app_name] = column
            
        appliance_powers = pd.DataFrame(
                appliance_powers_dict, dtype='float32')
        test_predictions_list.append(appliance_powers)

#### The save_model and load_model are self explanatory.
#### In this methods we save and load a model to/from a file in the given repository.

## Bellow we can find the complete class ready to run. 
#### (only using 1 building and the apparent power)

In [None]:
class Svm(Disaggregator):
    def __init__(self, params):
        self.model = {}
        self.MODEL_NAME = 'SVM'

    def partial_fit(self, train_main, train_appliances, **load_kwargs):
        x_train = train_main[0]["power"]["apparent"]    
        x_train = np.reshape( x_train.values, (np.size(x_train.values), 1) )

        for app_name, power in train_appliances:
            print("Training ", app_name, " in ", self.MODEL_NAME, " model\n", end="\r")
            
            y_train = power[0]["power"]["apparent"].values
            svm = SVR()

            param = [
                {
                    "kernel": ["rbf"],
                    "C": [0.03, 0.1, 0.3, 1]
                }
            ]
            clf = GridSearchCV(svm, param, cv=5, n_jobs=20, verbose=2)
            clf.fit(x_train, y_train)
            rbf = (clf.best_estimator_, clf.best_score_)
            
            param = [
                {
                    "kernel": ["poly"],
                    "degree": [2, 3, 4],
                    "C": [0.03, 0.1, 0.3, 1]
                }
            ]
            clf = GridSearchCV(svm, param, cv=5, n_jobs=20, verbose=2)
            clf.fit(x_train, y_train)
            poly = (clf.best_estimator_, clf.best_score_)

            if rbf[1] > poly[1]:
                print(rbf[0])
                self.model[app_name] = rbf[0]
            else:
                print(poly[0])
                self.model[app_name] = poly[0]
                        
    def disaggregate_chunk(self, test_mains):
        test_predictions_list = []
        x_test = test_mains[0]["power"]["apparent"]
        x_test = np.reshape( x_test.values, (np.size(x_test.values), 1) )

        appliance_powers_dict = {}

        for i, app_name in enumerate(self.model):

            print("Estimating power demand for '{}' in '{}'\n".format(app_name, self.MODEL_NAME))
            pred = self.model[app_name].predict(x_test)
            
            column = pd.Series(
                    pred, index=test_mains[0].index, name=i)
            appliance_powers_dict[app_name] = column
            
        appliance_powers = pd.DataFrame(
                appliance_powers_dict, dtype='float32')
        test_predictions_list.append(appliance_powers)

        return test_predictions_list

    def save_model(self, folder_name):
        string_to_save = json.dumps(self.model)
        os.makedirs(folder_name, exist_ok=True)
        with open(os.path.join(folder_name, "model.txt"), "w") as f:
            f.write(string_to_save)

    def load_model(self, folder_name):
        with open(os.path.join(folder_name, "model.txt"), "r") as f:
            model_string = f.read().strip()
            self.model = json.loads(model_string)