In [3]:
from collections import Counter
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
import scipy.io
from scipy.io import loadmat
from scipy import stats
from sklearn.tree import DecisionTreeRegressor, export_graphviz
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import sklearn.tree
import random
from PIL import Image
import string
import pandas as pd

In [4]:
RAW_FILE = "dataset/raw_power_data_alu32.csv"
df = pd.read_csv(RAW_FILE)

y = df["switching_power"].to_numpy().reshape(-1, 1)

df_X = df.drop(["switching_power"], axis=1)
features = list(df_X.columns)

In [9]:
for code in df['opcode'].unique():
    print(code, df[df['opcode'] == code]['switching_power'].mean())

ALU_SLTU 0.27354106779661014
ALU_COPY_B 0.2744712373831775
ALU_SLT 0.2780842668067227
ALU_AND 0.26880270291262137
ALU_SLL 0.2920164632768361
ALU_SRL 0.2765932798434442
ALU_XXX 0.29108913243761997
ALU_XOR 0.2761328968871595
ALU_ADD 0.2637254732510288
ALU_OR 0.2876492095588235
ALU_SRA 0.2727502450592886
ALU_SUB 0.26917280855397147


In [12]:
#Generating train and validation sets
RAND_SEED = 251289
DATA_FILE = "dataset/processed_power_data_alu32.mat"

def load_and_split_dataset(file=DATA_FILE, validation_size=0.2):
    '''
        Save .mat file contain the dataset ready for training/validation/testing
        One can read the .mat file using scipy.io.loadmat(<file_name>)
    '''
    data = loadmat(file)
    features = data['features']
    X_train, X_validation, y_train, y_validation = train_test_split(data['X'], data['y'], test_size=validation_size, shuffle=True, random_state=RAND_SEED)

    return X_train, X_validation, y_train, y_validation, features

X_train, X_validation, y_train, y_validation, features = load_and_split_dataset()
# y_train *= 1e6 #(uW)
# y_validation *= 1e6 #(uW)


In [13]:
X_train

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 1., 0.],
       [1., 1., 1., ..., 0., 0., 1.],
       ...,
       [1., 1., 0., ..., 1., 0., 0.],
       [1., 1., 1., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.]])

In [20]:
#Construct the DecisionTree and Train
# regressor = DecisionTreeRegressor(random_state=RAND_SEED) 
# use X_train, augment by adding another column, that column is output of least square (preds?) concatenate
regressor = GradientBoostingRegressor(random_state=RAND_SEED) 

regressor.fit(X_train, y_train)
#Construct the Predictions
y_pred = regressor.predict(X_validation)
# y_pred = y_train.mean()
# print(y_pred)
# NRMSE - 1/(y_max - y_min) * np.sqrt(np.sum((y - y^hat)**2)/n)
y_max = max(y_validation)[0]
y_min = min(y_validation)[0]
NRMSE = 1/(y_max - y_min) * np.sqrt(np.sum((y_validation - y_pred)**2)/len(y_validation))
print("NRMSE for DecisionTree: ", NRMSE)

  y = column_or_1d(y, warn=True)


NRMSE for DecisionTree:  7.848927067018361


In [None]:
depths = np.arange(1, 11)
feature_arr = np.arange(1, X_train.shape[1] + 1)

bestNRMSE = np.inf
best_y_pred = []
bestdepth = -1
bestfeature = -1
for feature in feature_arr:
    print(str(feature) + " Features Subset Selection")
    for depth in depths:
        #Construct the RandomForest and Train
        regressor = RandomForestRegressor(max_depth=depth, max_features = feature, random_state=RAND_SEED)
        regressor.fit(X_train, y_train.ravel())
        #Construct the Predictions
        y_pred = regressor.predict(X_validation)
        #NRMSE
        NRMSE = 1/(y_max - y_min) * np.sqrt(np.sum((y_validation - y_pred)**2)/len(y_validation))
        if (NRMSE < bestNRMSE):
            bestNRMSE = NRMSE
            bestdepth = depth
            bestfeature = feature
            best_y_pred = y_pred
        #print("     NRMSE for RandomForest with depth " + str(depth) + ": " + str(NRMSE))
print("Best NRMSE: " + str(bestNRMSE))
print("Best Depth:" + str(bestdepth))
print("Best Subset Feature Num:" + str(bestfeature))

In [None]:
best_y_pred

In [None]:
y_validation.ravel()

In [None]:
# other algorithms
# neural net? 

# produce plots show how we do validation
# course staff should understand what we did, performance of all opcodes, inputs, 
# training plots, condition number of matrix