In [7]:
from __future__ import print_function
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop

from keras.datasets import mnist
from keras.utils import np_utils
from keras import backend

import pandas as pd
import numpy as np
from scipy.integrate import simps
from scipy.stats import f
from math import sqrt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

In [8]:
def data_kyoto():
    kyoto_1_score = pd.read_csv('dataset/data1_score.csv')
    kyoto_1_all = pd.read_csv('dataset/dataset1_15features.csv')

    kyoto_1_all.rename(index=str, columns={"Userid": "userid"}, inplace=True)
    kyoto_1_all = kyoto_1_all.drop(['Score'], axis=1)
    
    kyoto_x_all = pd.merge(kyoto_1_all, kyoto_1_score, on='userid')
    kyoto_1 = kyoto_x_all
    kyoto_1.fillna(0.0, inplace=True)
    kyoto_1 = kyoto_1.drop(['userid'], axis=1)
    kyoto_1 = kyoto_1.drop(['Delete_Memo'], axis=1)
    kyoto_1 = kyoto_1[kyoto_1['score']>0]
    
    kyoto_y = kyoto_1['score']
    kyoto_x = kyoto_1.drop(['score'], axis=1)

    X_train, X_test, y_train, y_test = train_test_split(kyoto_x, kyoto_y, test_size=0.3)
    
    scale = StandardScaler()
    X_train = scale.fit_transform(X_train)
    X_test = scale.fit_transform(X_test)
    
    return X_train, y_train, X_test, y_test

In [9]:
def REC(y_true , y_pred):
    
    Accuracy = []
    Begin_Range = 0
    End_Range = 1.5
    Interval_Size = 0.01
    
    # List of epsilons
    Epsilon = np.arange(Begin_Range , End_Range , Interval_Size)
    
    # Main Loops
    for i in range(len(Epsilon)):
        count = 0.0
        for j in range(len(y_true)):
            
            y_true[j]
            y_pred[j]
            
            np.linalg.norm(y_true[j] - y_pred[j])
            np.sqrt( np.linalg.norm(y_true[j]) **2 + np.linalg.norm(y_pred[j])**2 ) < Epsilon[i]
            
            if np.linalg.norm(y_true[j] - y_pred[j]) / np.sqrt( np.linalg.norm(y_true[j]) **2 + np.linalg.norm(y_pred[j])**2 ) < Epsilon[i]:
                count = count + 1
        
        Accuracy.append(count/len(y_true))
    
    # Calculating Area Under Curve using Simpson's rule
    AUC = simps(Accuracy , Epsilon ) / End_Range
        
    # returning epsilon , accuracy , area under curve    
    return Epsilon, Accuracy, AUC

In [10]:
def model_evaluate (model, X_train, X_test, y_train, y_test):

    y_pred = model.predict(X_test)
    Deviation, Accuracy, auc = REC(y_test.values, y_pred)
    rmse = sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mape = ((y_test - y_pred) / y_test).abs().mean()*100
    
    f_test = np.var(y_test) / np.var(y_pred)
    df1 = len(y_test) - 1
    df2 = len(y_pred) - 1
    p_value = 1 - 2 * abs(0.5 - f.cdf(f_test, df1, df2))
    
    p_stars = ''
    if p_value <= 0.05:
        p_stars = '*'
    if p_value <= 0.01:
        p_stars = '**'
    if p_value <= 0.001:
        p_stars = '***'
    #print(F, p_value)
    
    y_validate = model.predict(X_train)
    training_loss = sqrt(mean_squared_error(y_train, y_validate))
    
    return_dict = {'rmse':rmse, 
                   'mape':mape, 
                   'r2':r2, 
                   'auc':auc,
                   'training_loss':training_loss, 
                   'f_test':f_test,
                   'p':p_stars,
                   'y_pred':y_pred, 
                   'y_validate':y_validate}
    
    return return_dict

In [11]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import roc_auc_score
import sys

X_train, y_train, X_test, y_test = data_kyoto()

X = X_train
y = y_train
X_val = X_test
y_val = y_test

space = {'choice': hp.choice('num_layers',
                    [ {'layers':'two', },
                    {'layers':'three',
                    'units3' : hp.choice('units3', np.arange(256, 511+1)),
                    'dropout3': hp.uniform('dropout3', .25,.75)}
                    ]),

            'units1' : hp.choice('units1', np.arange(2, 15+1)),
            'units2' : hp.choice('units2', np.arange(16, 255+1)),

            'dropout1': hp.uniform('dropout1', .01,.05),
            'dropout2': hp.uniform('dropout2',  .01,.05),

            'batch_size' : hp.choice('batch_size', np.arange(1, 5+1)),

            'nb_epochs' :  100,
            'optimizer': hp.choice('optimizer',['adadelta','adam','rmsprop']),
            'activation': 'relu'
        }

def f_nn(params):   
    
    #print(params)
    
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation
    from keras.optimizers import Adadelta, Adam, rmsprop

    def root_mean_squared_error(y_true, y_pred):
        return backend.sqrt(backend.mean(backend.square(y_pred - y_true), axis=-1))
    
    model = Sequential()
    model.add(Dense(params['units1'], activation='relu', input_shape=(14,)))
    model.add(Activation(params['activation']))
    model.add(Dropout(params['dropout1']))

    model.add(Dense(params['units2'], activation='relu'))
    model.add(Activation(params['activation']))
    model.add(Dropout(params['dropout2']))

    if params['choice']['layers']== 'three':
        model.add(Dense(params['choice']['units3'], activation='relu'))
        model.add(Activation(params['activation']))
        model.add(Dropout(params['choice']['dropout3']))    

    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss=root_mean_squared_error, optimizer=params['optimizer'])

    model.fit( X, y,
              nb_epoch=params['nb_epochs'], 
              batch_size=params['batch_size'], 
              verbose =0)
    
    #result_dict = model_evaluate (ann.model, X, X_val, y, y_val)

    y_val =model.predict(X_val, batch_size = params['batch_size'], verbose = 0)
    rmse = sqrt(mean_squared_error(y_test, y_val))
    print(rmse)
    #acc = roc_auc_score(y_val, pred_auc)
    #print('AUC:', acc)
    sys.stdout.flush() 
    #return {'loss': -acc, 'status': STATUS_OK}
    return {'loss': rmse, 'status': STATUS_OK, 'model': model}


trials = Trials()
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=50, trials=trials)
print(best)



82.66947655424349
82.66876678503398
82.6687671694201
82.66880430293513
82.67204315259714
82.66876677971656
82.66912876227293
82.6687676046373
82.66876679981459
82.66876677439915
82.66881071649152
82.66876677439915
82.66876678242033
82.66876677439915
82.66876677439915
82.66877949078226
82.66877561429011
82.6687718312629
82.66896981210746
82.6697984306207
82.66881842618706
82.66876684695033
82.66886459527207
82.66876677439915
82.66878560398206
82.66876698610442
82.66883118429791
82.66876677971656
82.66876677971656
82.66876677439915
82.66876677439915
82.66876685389002
82.66876851373515
82.66877474818367
82.66876677439915
82.66881188212571
82.66876685217763
82.66878947784656
82.6687670253091
82.66876677439915
82.66877107745168
82.66876677439915
82.66876689985412
82.66943821548729
82.66876677439915
82.6688034849443
82.66876677439915
82.66876677439915
82.6687769259803
82.66876677439915
{'optimizer': 2, 'num_layers': 0, 'units2': 82, 'batch_size': 0, 'dropout2': 0.041424696174590894, 'units1'