In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import KFold
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import accuracy_score
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns
import numpy as np
import pandas as pd
import category_encoders as ce
import os
import pickle
import gc
from tqdm import tqdm
import pickle
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn import ensemble
import xgboost as xgb
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [2]:
def encode_text_features(encode_decode, data_frame, encoder_isa=None, encoder_mem_type=None):
    # Implement Categorical OneHot encoding for ISA and mem-type
    if encode_decode == 'encode':
        encoder_isa = ce.one_hot.OneHotEncoder(cols=['isa'])
        encoder_mem_type = ce.one_hot.OneHotEncoder(cols=['mem-type'])
        encoder_isa.fit(data_frame, verbose=1)
        df_new1 = encoder_isa.transform(data_frame)
        encoder_mem_type.fit(df_new1, verbose=1)
        df_new = encoder_mem_type.transform(df_new1)
        encoded_data_frame = df_new
    else:
        df_new1 = encoder_isa.transform(data_frame)
        df_new = encoder_mem_type.transform(df_new1)
        encoded_data_frame = df_new
        
    return encoded_data_frame, encoder_isa, encoder_mem_type

In [3]:
def absolute_percentage_error(Y_test, Y_pred):
    error = 0
    for i in range(len(Y_test)):
        if(Y_test[i]!= 0 ):
            error = error + (abs(Y_test[i] - Y_pred[i]))/Y_test[i]
        
    error = error/ len(Y_test)
    return error

In [4]:
def create_model(name = 'dnn_1'):
    input_dim = 22
    # define the keras model
    # DNN 1
    if name == 'dnn_1':
        dnn_1 = Sequential()
        dnn_1.add(Dense(512, input_dim=input_dim, activation='relu'))
        dnn_1.add(Dense(1, activation='linear'))
        # print('Model : DNN 1', dnn_1.summary())
        # compile the keras model
        return dnn_1

    
    
    # DNN 2 
    # define the keras model
    elif name == 'dnn_2':
        dnn_2 = Sequential()
        dnn_2.add(Dense(512, input_dim=input_dim, activation='relu'))
        dnn_2.add(Dense(512, activation='relu'))
        dnn_2.add(Dense(512, activation='relu'))
        dnn_2.add(Dense(1, activation='linear'))
        # print('Model : DNN 2', dnn_2.summary())
        return dnn_2
    # compile the keras model
    
    # DNN 3
    # define the keras model
    elif name == 'dnn_3':
        dnn_3 = Sequential()
        dnn_3.add(Dense(256, input_dim=input_dim, activation='relu'))
        dnn_3.add(Dense(64,  activation='relu'))
        dnn_3.add(Dense(16,  activation='relu'))
        dnn_3.add(Dense(4,  activation='relu'))
        dnn_3.add(Dense(1, activation='linear'))
        # print('Model : DNN 3', dnn_3.summary())
        # compile the keras model
        return dnn_3
    # DNN 4
    # define the keras model
    else: 
        dnn_4 = Sequential()
        dnn_4.add(Dense(512, input_dim=input_dim, activation='relu'))
        dnn_4.add(Dense(128,  activation='relu'))
        dnn_4.add(Dense(32,  activation='relu'))
        dnn_4.add(Dense(8,  activation='relu'))
        dnn_4.add(Dense(2,  activation='relu'))
        dnn_4.add(Dense(1, activation='linear'))
        # print('Model : DNN 4', dnn_4.summary())
        # compile the keras model
        return dnn_4
        

In [7]:
def process_all(dataset_path, dataset_name, path_for_saving_data):
    
    ################## Data Preprocessing ######################
    df = pd.read_csv(dataset_path)
    encoded_data_frame, encoder_isa, encoder_mem_type = encode_text_features('encode', df, 
                                                                             encoder_isa = None, encoder_mem_type=None)
    total_data = encoded_data_frame.drop(columns = ['arch', 'arch1'])
    total_data = total_data.fillna(0)
    X_columns = total_data.drop(columns = 'PS').columns
    # X_columns = total_data.columns
    X = total_data.drop(columns = ['runtime','PS']).to_numpy()
    # X = total_data.drop(columns = ['runtime']).to_numpy()
    Y = total_data['runtime'].to_numpy()
    print('Data X and Y shape', X.shape, Y.shape)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
    print('Train Test Split:', X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.fit_transform(X_test)
    ################## Data Preprocessing ######################


    
    k = 0
    # best_models = [dnn_1, dnn_2, dnn_3, dnn_4]
    best_models_name = ['dnn_1', 'dnn_2', 'dnn_3', 'dnn_4']
    # best_models_name = ['dnn_4']
    df = pd.DataFrame(columns = ['model_name', 'dataset_name', 'r2', 'mse', 'mape', 'mae' ])
    
    for model in best_models_name:
        
        
        print('Running model number:', k+1, 'with Model Name: ', best_models_name[k])
        print('####################################################################')
        r2_scores = []
        mse_scores = []
        mape_scores = []
        mae_scores = []

        # cv = KFold(n_splits = 10, random_state = 42, shuffle = True)
        cv = ShuffleSplit(n_splits=10, random_state=0)
        # print(cv)
        
        fold = 1
        for train_index, test_index in cv.split(X):
            model_orig = create_model(best_models_name[k])
            # print("Train Index: ", train_index, "\n")
            # print("Test Index: ", test_index)

            X_train_fold, X_test_fold, Y_train_fold, Y_test_fold = X[train_index], X[test_index], Y[train_index], Y[test_index]
            # print(X_train_fold.shape, X_test_fold.shape, Y_train_fold.shape, Y_test_fold.shape)
            model_orig.compile(loss='mae', optimizer='adam', metrics=['mae'])
            model_orig.fit(X_train_fold, Y_train_fold, epochs=100, batch_size=10, verbose = 0)
            Y_pred_fold = model_orig.predict(X_test_fold)
            
            # save the folds to disk
            data = [X_train_fold, X_test_fold, Y_train_fold, Y_test_fold]
            filename = path_for_saving_data + '/folds_data/' + best_models_name[k] +'_'+ str(fold) + '.pickle'
            pickle.dump(data, open(filename, 'wb'))
            
            
            # save the model to disk
            # serialize model to JSON
            filename_1 = path_for_saving_data + '/models_data/' + best_models_name[k] + '_' + str(fold) + '.json'
            filename_2 = path_for_saving_data + '/models_data/' + best_models_name[k] + '_' + str(fold) + '.h5'
            fold = fold + 1
            model_json = model_orig.to_json()
            with open(filename_1, "w") as json_file:
                json_file.write(model_json)
            # serialize weights to HDF5
            model_orig.save_weights(filename_2)
            print("Saved model to disk")

            # later...
            '''
            # load json and create model
            json_file = open('model.json', 'r')
            loaded_model_json = json_file.read()
            json_file.close()
            loaded_model = model_from_json(loaded_model_json)
            # load weights into new model
            loaded_model.load_weights("model.h5")
            print("Loaded model from disk")
            '''
            # some time later...
            '''
            # load the model from disk
            loaded_model = pickle.load(open(filename, 'rb'))
            result = loaded_model.score(X_test, Y_test)
            print(result)
            '''
            # scores.append(best_svr.score(X_test, y_test))
            '''
            plt.figure()
            plt.plot(Y_test_fold, 'b')
            plt.plot(Y_pred_fold, 'r')
            '''
            # print('Accuracy =',accuracy_score(Y_test, Y_pred))
            r2_scores.append(r2_score(Y_test_fold, Y_pred_fold))
            mse_scores.append(mean_squared_error(Y_test_fold, Y_pred_fold))
            mape_scores.append(absolute_percentage_error(Y_test_fold, Y_pred_fold))
            mae_scores.append(mean_absolute_error(Y_test_fold, Y_pred_fold))
        
        df = df.append({'model_name': best_models_name[k], 'dataset_name': dataset_name
                        , 'r2': r2_scores, 'mse': mse_scores, 'mape': mape_scores, 'mae': mae_scores }, ignore_index=True)
        k = k + 1  
    print(df.head())
    df.to_csv(r'Results_svm_simulated.csv')
        # print('MSE for 10 folds\n', mse_scores)
        # print('\nR2 scores for 10 folds\n', r2_scores)
        # print('\nMAPE for 10 folds\n', mape_scores)
        # print('\nMAE scores for 10 folds\n', mae_scores)
        # print('\nMean MSE = ', np.mean(mse_scores), '\nMedian MSE = ', np.median(mse_scores))
        # print('\nMean R2 score =',np.mean(r2_scores), '\nMedian R2 scores = ', np.median(r2_scores))
        # print('\nMean Absolute Percentage Error =',np.mean(mape_scores), 
        #       '\nMedian Absolute Percentage Error =', np.median(mape_scores))    
        # print('\nMean MAE =',np.mean(mae_scores), 
        #      '\nMedian MAE =', np.median(mae_scores)) 


In [None]:
dataset_name = 'svm_simulated'
dataset_path = 'C:\\Users\\Rajat\\Desktop\\PROJECT_MODE\\Performance_Dataset\\Performance\\Simulator\\svm.csv'
path_for_saving_data = 'C:\\Users\\Rajat\\Desktop\\PROJECT_MODE\\Performance_Dataset\\Performance\\data\\' + dataset_name
process_all(dataset_path, dataset_name, path_for_saving_data)

Data X and Y shape (475, 22) (475,)
Train Test Split: (380, 22) (95, 22) (380,) (95,)
Running model number: 1 with Model Name:  dnn_1
####################################################################
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Running model number: 2 with Model Name:  dnn_2
####################################################################
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Running model number: 3 with Model Name:  dnn_3
####################################################################
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Sav