In [2]:
import numpy as np
import matplotlib.pyplot as plt
from numpy import *
from sklearn import datasets 
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.neural_network import MLPClassifier
import random
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_text
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor 
from sklearn.metrics import roc_curve, auc

def read_data(run_num, prob):

    normalise = False
    
    if prob == 'classifification': 
        #Source: Pima-Indian diabetes dataset: https://www.kaggle.com/kumargh/pimaindiansdiabetescsv
        data_in = genfromtxt("datasets/pima.csv", delimiter=",")
        data_inputx = data_in[:, 0:8]  # all features 0 - 7 
        data_inputy = data_in[:, -1]   # this is target - so that last col is selected from data

    elif prob == 'regression':
        # energy - regression prob
        data_in = genfromtxt('datasets/energy/ENB2012_data.csv', delimiter=",")  
        data_inputx = data_in[:, 0:8]  # all features 0 - 7
        data_inputy = data_in[:, 8]    # this is target - just the heating load selected from data
  

    if normalise == True:
        transformer = Normalizer().fit(data_inputx)  # fit does nothing.
        data_inputx = transformer.transform(data_inputx) 

 
    x_train, x_test, y_train, y_test = train_test_split(data_inputx, data_inputy, test_size=0.40, random_state=run_num)

    return x_train, x_test, y_train, y_test
 
    
def scipy_models(x_train, x_test, y_train, y_test, type_model, hidden, learn_rate, run_num, problem):

    print(run_num, ' is our exp run')

    tree_depth = 2
 
    if problem == 'classifification':
        if type_model == 0:    # SGD 
            model = MLPClassifier(hidden_layer_sizes=(hidden,), random_state=run_num, 
                                  max_iter=100, solver='sgd', learning_rate_init=learn_rate ) 
            
        elif type_model == 1:  # https://scikit-learn.org/stable/modules/tree.html (see how tree can be visualised)
            model = DecisionTreeClassifier(random_state=0, max_depth=tree_depth)

    elif problem == 'regression':
        if type_model == 0:    # SGD 
            '''model = MLPRegressor(hidden_layer_sizes=(hidden,), random_state=run_num, 
                                 max_iter=100, solver='sgd', learning_rate_init=learn_rate )''' 

            model = MLPRegressor(hidden_layer_sizes=(hidden*3,), random_state=run_num, 
                                 max_iter=500, solver='adam', learning_rate_init=learn_rate) 
            
        elif type_model == 1:  # https://scikit-learn.org/stable/modules/tree.html (see how tree can be visualised)
            model = DecisionTreeRegressor(random_state=0, max_depth=tree_depth)
   
    # Train the model using the training sets
    model.fit(x_train, y_train)

    if type_model == 1:
        r = export_text(model)
        print(r)

    # Make predictions using the testing set
    y_pred_test = model.predict(x_test)
    y_pred_train = model.predict(x_train) 

    if problem == 'regression':
        perf_test = np.sqrt(mean_squared_error(y_test, y_pred_test)) 
        perf_train = np.sqrt(mean_squared_error(y_train, y_pred_train)) 

    if problem == 'classifification': 
        perf_test = accuracy_score(y_pred_test, y_test) 
        perf_train = accuracy_score(y_pred_train, y_train) 
        cm = confusion_matrix(y_pred_test, y_test) 
        #print(cm, 'is confusion matrix')
        #auc = roc_auc_score(y_pred, y_test, average=None) 

    return perf_test #,perf_train


def main():
    
    max_expruns = 5

    SGD_all = np.zeros(max_expruns) 
    Adam_all = np.zeros(max_expruns) 
    tree_all = np.zeros(max_expruns)  
 
    learn_rate = 0.01
    hidden = 8

    #prob = 'classifification' #  classification or regression 
    prob = 'regression' #  classification or regression 

    # classifcation accurary is reported for classification and RMSE for regression

    print(prob, ' is our problem')
    
 
    for run_num in range(0, max_expruns):
        
        x_train, x_test, y_train, y_test = read_data(run_num, prob)   
        
        acc_sgd = scipy_models(x_train, x_test, y_train, y_test, 0, hidden, learn_rate, run_num, prob)   # SGD 
        acc_tree = scipy_models(x_train, x_test, y_train, y_test, 1, hidden, learn_rate, run_num, prob)  # Decision Tree
       
        SGD_all[run_num] = acc_sgd 
        tree_all[run_num] = acc_tree
    
    print(SGD_all,' SGD_all')
    print(np.mean(SGD_all), ' mean SGD_all')
    print(np.std(SGD_all), ' std SGD_all')
 
    print(tree_all, hidden,' tree_all')
    print(np.mean(tree_all),  ' tree _all')
    print(np.std(tree_all),  ' tree _all')


    
if __name__ == '__main__':
    main() 

regression  is our problem
0  is our exp run
0  is our exp run
|--- feature_0 <= 0.75
|   |--- feature_6 <= 0.18
|   |   |--- value: [11.32]
|   |--- feature_6 >  0.18
|   |   |--- value: [14.82]
|--- feature_0 >  0.75
|   |--- feature_1 <= 624.75
|   |   |--- value: [28.77]
|   |--- feature_1 >  624.75
|   |   |--- value: [37.14]

1  is our exp run
1  is our exp run
|--- feature_1 <= 673.75
|   |--- feature_1 <= 624.75
|   |   |--- value: [28.51]
|   |--- feature_1 >  624.75
|   |   |--- value: [37.22]
|--- feature_1 >  673.75
|   |--- feature_2 <= 330.75
|   |   |--- value: [12.12]
|   |--- feature_2 >  330.75
|   |   |--- value: [15.55]

2  is our exp run
2  is our exp run
|--- feature_1 <= 673.75
|   |--- feature_1 <= 624.75
|   |   |--- value: [28.57]
|   |--- feature_1 >  624.75
|   |   |--- value: [37.18]
|--- feature_1 >  673.75
|   |--- feature_6 <= 0.18
|   |   |--- value: [11.08]
|   |--- feature_6 >  0.18
|   |   |--- value: [14.58]

3  is our exp run
3  is our exp run
|---