In [1]:
import pandas as pd
import numpy as np
rand_st = 101
tst_sz=0.4

## Using a linear model (Logistic Regressor)

In [2]:
df_elu=pd.read_csv('elu_data.csv',usecols=['dropout', 'learning.rate', 'L1.regularization', 'training.steps',
       'accuracy', 'execution.time'])
df_elu.head()

Unnamed: 0,dropout,learning.rate,L1.regularization,training.steps,accuracy,execution.time
0,0.01,0.0002,0.01,100,0.266667,6.691806
1,0.01,0.0002,0.01,500,0.266667,9.475716
2,0.01,0.0002,0.01,2500,0.5,20.093862
3,0.01,0.0002,0.1,100,0.466667,7.328618
4,0.01,0.0002,0.1,500,0.266667,10.550757


In [3]:
def pred_quality(x):
    if x<0.5:
        return 0
    elif (x>=0.5 and x<0.8):
        return 1
    else:
        return 2

df_elu['Pred.Quality']=df_elu['accuracy'].apply(pred_quality)
df_elu.to_csv("elu_data_Quality.csv")

In [4]:
df_elu.head(5)

Unnamed: 0,dropout,learning.rate,L1.regularization,training.steps,accuracy,execution.time,Pred.Quality
0,0.01,0.0002,0.01,100,0.266667,6.691806,0
1,0.01,0.0002,0.01,500,0.266667,9.475716,0
2,0.01,0.0002,0.01,2500,0.5,20.093862,1
3,0.01,0.0002,0.1,100,0.466667,7.328618,0
4,0.01,0.0002,0.1,500,0.266667,10.550757,0


In [5]:
lst=list(df_elu.columns)

In [6]:
X=df_elu[lst[0:4]]
X.head()

Unnamed: 0,dropout,learning.rate,L1.regularization,training.steps
0,0.01,0.0002,0.01,100
1,0.01,0.0002,0.01,500
2,0.01,0.0002,0.01,2500
3,0.01,0.0002,0.1,100
4,0.01,0.0002,0.1,500


In [7]:
y=df_elu['Pred.Quality']

In [8]:
from sklearn.linear_model import LogisticRegression
log_model=LogisticRegression(max_iter=10000,C=100,tol=0.0000001,solver='sag',multi_class='multinomial')

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=tst_sz,random_state=rand_st)

In [10]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
X_test_minmax = min_max_scaler.fit_transform(X_test)

In [11]:
log_model.fit(X_train_minmax,y_train)

LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=10000, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=None, solver='sag',
          tol=1e-07, verbose=0, warm_start=False)

In [12]:
y_pred=log_model.predict(X_test_minmax)

In [13]:
y_pred

array([1, 0, 2, 0, 1, 2, 0, 1, 2, 1, 2, 2, 0, 2, 1, 2, 1, 0, 1, 0, 0, 1, 2,
       0, 0, 1, 2, 2, 1, 1, 0, 1, 2, 0, 0, 0, 0, 1, 2, 2, 1, 2, 1, 1, 0, 1,
       1, 0, 0, 2, 1, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 0, 2, 2, 0, 0, 0,
       1, 1, 2, 1, 2, 0, 1, 1, 2, 2, 1, 1, 2, 0, 0, 0, 2, 0, 1, 1, 0, 0, 0,
       2, 2, 0, 1, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 1, 1,
       1], dtype=int64)

In [14]:
from sklearn.metrics import confusion_matrix
conf_mat = pd.DataFrame(confusion_matrix(y_test, y_pred),
                        columns=['Predicted Low','Predicted Medium','Predicted High'], 
                        index=['True Low','True Medium','True High'])
conf_mat

Unnamed: 0,Predicted Low,Predicted Medium,Predicted High
True Low,20,19,3
True Medium,16,7,2
True High,6,7,36


In [15]:
from sklearn.metrics import classification_report
target_names = ['Low','Medium','High']
print(classification_report(y_test, y_pred, target_names=target_names))

             precision    recall  f1-score   support

        Low       0.48      0.48      0.48        42
     Medium       0.21      0.28      0.24        25
       High       0.88      0.73      0.80        49

avg / total       0.59      0.54      0.56       116



## Using a DNN Classifier!

In [16]:
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '2'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from tensorflow import logging
logging.set_verbosity(logging.ERROR)

In [17]:
def split_dataset(data,test_size):
    data_file = data
    data = pd.read_csv(data,usecols=['dropout', 'learning.rate', 'L1.regularization', 'training.steps','Pred.Quality'])
    from sklearn.model_selection import train_test_split
    data_train, data_test = train_test_split(data,test_size=test_size,random_state=rand_st)
    #data_train=pd.DataFrame(data_train)
    #data_test=pd.DataFrame(data_test)
    name = str(data_file).split('.')[0]
    name_train=name+'_train.csv'
    name_test=name+'_test.csv'
    data_train.to_csv(name_train,index=False,header=False)
    data_test.to_csv(name_test,index=False,header=False)
    return (data_train,data_test)

In [18]:
elu_train,elu_test=split_dataset('elu_data_Quality.csv',tst_sz)

In [19]:
training_set = tf.contrib.learn.datasets.base.load_csv_without_header(
    filename="elu_data_Quality_train.csv",
    target_dtype=np.int,
    features_dtype=np.float64,
    target_column=-1)

test_set = tf.contrib.learn.datasets.base.load_csv_without_header(
    filename='elu_data_Quality_test.csv',
    target_dtype=np.int,
    features_dtype=np.float64,
    target_column=-1)

In [20]:
# Specify that all features have real-value data
feature_columns = [tf.feature_column.numeric_column("x", shape=[4])]

In [21]:
# Define the training inputs
train_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": np.array(training_set.data)},
      y=np.array(training_set.target),
      num_epochs=None,
      shuffle=True)

In [22]:
# Define the test inputs
test_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": np.array(test_set.data)},
      y=np.array(test_set.target),
      num_epochs=1,
      shuffle=False)

### Training and classification

In [23]:
classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,hidden_units=[20,20,20], dropout=0.01,
                                                      n_classes=3,
                                                    optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.001,
                                                    l1_regularization_strength=1.0),
                                                   activation_fn=tf.nn.elu)

In [24]:
classifier.train(input_fn=train_input_fn, steps=5000)
classifier.evaluate(input_fn=test_input_fn)["accuracy"]

0.5258621