In [1]:
try:
    import algos
    import utils_m
    import os
    import datetime
    import pytz
    import itertools as it
    import gc
    from pymongo import MongoClient
    import pickle
except ImportError:
    print("Required modules not installed")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [16]:
from keras.constraints import maxnorm
from keras.layers import Activation, Dense, Dropout
from keras.layers.advanced_activations import LeakyReLU, PReLU, ThresholdedReLU, ELU
from keras.models import Sequential
from keras.models import load_model as keras_load_model
from keras import regularizers, optimizers
# from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
from keras import backend as K

In [17]:
db_name = "db_stockml"
bot_name = "bot_m_algo_dlk"

In [18]:
def save_data(bot, data, status):
    client = MongoClient()
    db = client[db_name]
    c_bots = db["c_bots"]
    x = db.c_bots.update_one({"bot_name":bot}, {"$set":{"status": status, "status_datetime":pytz.timezone('Asia/Kolkata').localize(datetime.datetime.now()), "data":data}})
    client.close()
    del client, db, c_bots
    return x

In [6]:
import time

In [18]:
def save_data(data, filename):
    with open(str("flag_" + filename + ".txt"), "r") as g:
        check = g.read()
    if check == "open":
        print("file %s open" % filename)
        time.sleep(5)
        return save_data(data, filename)
    with open(str("flag_" + filename + ".txt"), "w") as f:
        f.write("open")
    with open(str("flag_" + filename + ".txt"), "r") as g:
        check = g.read()
    if check == "close":
        print("file %s open" % filename)
        time.sleep(5)
        return save_data(data, filename)
    with open(filename, 'wb') as f:
        pickle.dump(data, f)
    with open(str("flag_" + filename + ".txt"), "w") as f:
        f.write("close")
    return None

In [None]:
def read_data(filename):
    with open(str("flag_" + filename + ".txt"), "r") as g:
        check = g.read()
    if check == "open":
        print("file %s open" % filename)
        time.sleep(5)
        return read_data(filename)
    with open(str("flag_" + filename + ".txt"), "w") as f:
        f.write("open")
    with open(str("flag_" + filename + ".txt"), "r") as g:
        check = g.read()
    if check == "close":
        print("file %s open" % filename)
        time.sleep(5)
        return read_data(filename)
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    with open(str("flag_" + filename + ".txt"), "w") as f:
        f.write("close")
    return data

In [19]:
def update_status(db_stockml, c_bots, bot_name, message):
    x = db_stockml.c_bots.update_one({"bot_name":bot_name}, {"$set":{"status": message, "status_datetime":pytz.timezone('Asia/Kolkata').localize(datetime.datetime.now())}})
    return x

In [50]:
class DLK(algos.Algo):        
    
    # Initialize instance
    def __init__(self, mode, name="dlk", data=None, bot=None, data_filename="deep_optimized_networks_data.pickle"):
        super(DLK, self).__init__(mode)
        
        # If data is not supplied, create it
        if data != None:
            self.data = data
        else:
            self.data = {"stage":0, "data_filename":data_filename}
            
        if self.data["stage"] == 0:
            self.data = {"combs":{}, "combs_comp":{}, "best_score":{}, "best_params":{}, "grids":{}, "stage":0}
            self.data["optimized"] = False
            self.data["name"] = name
            self.data["output_layer_units"] = 1 
            self.data['base_range'] = {"i_layer_units":{"range":[1,500], "min":1},
                               "h_layer_1_units":{"range":[1,500], "min":1},
                               "h_layer_2_units":{"range":[1,500], "min":1},
                               "h_layer_3_units":{"range":[1,500], "min":1},
                               "activation":{"range":['tanh']},
                               "optimizer":{"range":['RMSprop']},
                               "batch_size":{"range":[64], "min":8},
                               "epochs":{"range":[10], "min":2},
                               "dropout_rate":{"range":[0], "min":0.05}
                              }
        if bot != None:
            self.data["bot_name"] = bot
        else:
            self.data["bot_name"] = bot_name
        try:
            self.data["run_id"]
        except KeyError:
            self.data["run_id"] = utils_m.generate_run_id(db_name)
        
        self.data["stage"] = 1
        save_data(self.data["bot_name"], self.data, "running")
            
#         self.data['base_range'] = {"no_of_hidden_layers":2,
#                            "units":{"type":"int", "range":[1,100]},
#                            "activation":{"type":"str", "range":['tanh', 'elu', 'softsign']},
#                            "optimizer":{"type":"str", "range":['RMSprop', 'Adagrad', 'Adam']},
#                            "batch_size":{"type":"int", "range":[32, 512]},
#                            "epochs":{"type":"int", "range":[10, 100]},
#                            "dropout":{"type":"str", "range":['no','yes']},
#                            "dropout_rate":{"type":"float", "range":[0,1]}
#                           }
        # Testing    
#         self.data['base_range'] = {"i_layer_units":{"type":"int", "range":[1,500], "min":1},
#                            "h_layer_1_units":{"type":"int", "range":[1,500], "min":1},
#                            "h_layer_2_units":{"type":"int", "range":[1,500], "min":1},
#                            "h_layer_3_units":{"type":"int", "range":[1,500], "min":1},
#                            "activation":{"type":"str", "range":['tanh']},
#                            "optimizer":{"type":"str", "range":['RMSprop']},
#                            "batch_size":{"type":"int", "range":[64], "min":8},
#                            "epochs":{"type":"int", "range":[10], "min":2},
#                            "dropout_rate":{"type":"float", "range":[0], "min":0.05}
#                           }
        # Testing    
#         self.data['base_range'] = {"i_layer_units":{"type":"int", "range":[10,10], "min":1},
#                            "h_layer_1_units":{"type":"int", "range":[8,10], "min":1},
#                            "h_layer_2_units":{"type":"int", "range":[10,10], "min":1},
#                            "h_layer_3_units":{"type":"int", "range":[10,10], "min":1},
#                            "activation":{"type":"str", "range":['tanh']},
#                            "optimizer":{"type":"str", "range":['RMSprop']},
#                            "batch_size":{"type":"int", "range":[64], "min":8},
#                            "epochs":{"type":"int", "range":[1], "min":2},
#                            "dropout_rate":{"type":"float", "range":[0], "min":0.05}
#                           }
    
    def get_param_type(self, param):
        if "layer" in param:
            return "int"
        elif param == "batch_size" or param == "epochs":
            return "int"
        elif param == "dropout_rate":
            return "float"
        elif param == "activation" or param == "optimizer":
            return "str"
        else:
            raise ValueError("unrecognized paramaeter: %s" % param)
    
    def get_activation_layer(self, activation):
        if activation == 'LeakyReLU':
            return LeakyReLU()
        if activation == 'PReLU':
            return PReLU()
        if activation == 'ELU':
            return ELU()
        if activation == 'ThresholdedReLU':
            return ThresholdedReLU()

        return Activation(activation)

    def get_optimizer(self, name='Adadelta'):
        if name == 'SGD':
            return optimizers.SGD(clipnorm=1.)
        if name == 'RMSprop':
            return optimizers.RMSprop(clipnorm=1.)
        if name == 'Adagrad':
            return optimizers.Adagrad(clipnorm=1.)
        if name == 'Adadelta':
            return optimizers.Adadelta(clipnorm=1.)
        if name == 'Adam':
            return optimizers.Adam(clipnorm=1.)
        if name == 'Adamax':
            return optimizers.Adamax(clipnorm=1.)
        if name == 'Nadam':
            return optimizers.Nadam(clipnorm=1.)

        return optimizers.Adam(clipnorm=1.)

    def delete_old_model(self, n):
        s = str("model_aglo_dlk-" + str(n) + "-")
        for i in os.listdir(os.getcwd()):
            if s in i:
                os.remove(os.path.join(os.getcwd(), i))
        return None
        
    def train(self, x_train, y_train, x_val=None, y_val=None, params=None, loss='binary_crossentropy', metric='accuracy', verbose=1):
        kernel_initializer='normal'
        
        model = Sequential()
           
        model.add(Dense(params["i_layer_units"], input_dim=x_train.shape[1], kernel_initializer=kernel_initializer, kernel_regularizer=regularizers.l2(0.01)))
        model.add(self.get_activation_layer(params["activation"]))
        if params["dropout_rate"] > 0:
            model.add(Dropout(params["dropout_rate"]))

        for i in range(0, sum(key.startswith("h_layer") for key in params)):
            s = str("h_layer_" + str(i+1) + "_units")
            if params[s] == 0:
                continue
            model.add(Dense(params[s], kernel_initializer=kernel_initializer, kernel_regularizer=regularizers.l2(0.01)))
            model.add(self.get_activation_layer(params["activation"]))
            if params["dropout_rate"] > 0:
                model.add(Dropout(params["dropout_rate"]))

        model.add(Dense(self.data["output_layer_units"], kernel_initializer=kernel_initializer, activation='sigmoid'))

        model.compile(loss=loss,
                      optimizer=self.get_optimizer(params["optimizer"]), 
                      metrics=[metric])
        
        if x_val == None or y_val == None:
            model.fit(x_train, y_train,
                      batch_size=params["batch_size"],
                      epochs=params["epochs"],
                      verbose=verbose,
                      shuffle=True
                     )
        else:
            model.fit(x_train, y_train,
                      batch_size=params["batch_size"],
                      epochs=params["epochs"],
                      verbose=verbose,
                      validation_data=(x_val, y_val),
                      shuffle=True
                     )
        
        del kernel_initializer
        return model
        
    
    def list_from_range(self, rn):
        if rn["type"] == "int" or rn["type"] == "float":
            if len(rn["range"]) == 0 or len(rn["range"]) > 2:
                raise ValueError("wrong range length")
        if rn["type"] == "int":
            if len(rn["range"]) == 1:
                return rn["range"]
            mn = rn["min"]
            top = round(rn["range"][1])
            bottom = round(rn["range"][0])
            dif = top - bottom
            if dif < 0:
                raise ValueError("Second number in range should be greater than or equal to the first one")
            elif dif == 0:
                return [top]
            elif dif <= mn:
                if bottom == top:
                    return [top]
                else:
                    return [bottom, top]
            elif dif <= 2:
                return list(range(bottom,top+1))
            else:
                delta = int(dif/2)
                return [bottom, round(((bottom + delta) + (top - delta))/2), top]
        elif rn["type"] == "float":
            if len(rn["range"]) == 1:
                return rn["range"]
            mn = rn["min"]
            top = rn["range"][1] + 0.0
            bottom = rn["range"][0] + 0.0
            dif = top - bottom
            if dif < 0:
                raise ValueError("Second number in range should be greater than or equal to the first one")
            elif dif == 0:
                return [top]
            elif dif <= mn:
                if bottom == top:
                    return [top]
                else:
                    return [bottom, top]
            else:
                delta = dif/2
                return [bottom, bottom + delta, top]
        elif rn["type"] == "str":
            return rn["range"]
    
    def process_combinations(self, combs):
        r = []
        for comb in combs:
            c = {}
            for p in comb.keys():
                if p.startswith("i_layer") or p.startswith("h_layer") or self.data['base_range'][p]["type"] == "int":
                    c[p] = int(comb[p])
                elif self.data['base_range'][p]["type"] == "str":
                    c[p] = comb[p]
                elif self.data['base_range'][p]["type"] == "float":
                    c[p] = float(comb[p])
            r.append(c)
        return r
    
    def generate_combinations(self, d):
        r = {}
        for key in d.keys():
            r[key] = list(map(str, d[key]))
        
        keys, values = zip(*r.items())
        combs = [dict(zip(keys, v)) for v in it.product(*values)]
        return process_combinations(combs)
                
    def range_from_last(self, grid, b_params):
        last = str(last)
#         grid = self.data["grids"][last]
        new = {}
        for p in grid.keys():
            typ = self.get_param_type(p)
            if typ == "str":
                new[p] = {"range":[b_params[p]], "type":typ}
            else:
                minim = self.data["base_range"][p]["min"]
                loc = grid[p].index(b_params[p])
                new[p] = {"min":minim}
                if len(grid[p]) == 1:
                    new[p]["range"] = [grid[p][loc], grid[p][loc]]
                elif len(grid[p]) == 2:
                    new[p]["range"] = [b_params[p], b_params[p]]
                elif loc == 0:
                    bottom = grid[p][loc] - (grid[p][loc+1] - grid[p][loc])/2
                    if bottom <= self.data["base_range"][p]["range"][0]:
                        new[p]["range"] = [grid[p][loc], (grid[p][loc] + grid[p][loc+1])/2]
                    else:
                        new[p]["range"] = [bottom, (grid[p][loc] + grid[p][loc+1])/2]
                elif loc == len(grid[p]) - 1:
                    top = grid[p][loc] + (grid[p][loc] - grid[p][loc-1])/2
                    if top >= self.data["base_range"][p]["range"][1]:
                        new[p]["range"] = [(grid[p][loc] + grid[p][loc-1])/2, grid[p][loc]]
                    else:
                        new[p]["range"] = [(grid[p][loc] + grid[p][loc-1])/2, top]
                else:
                    new[p]["range"] = [(grid[p][loc] + grid[p][loc-1])/2, (grid[p][loc] + grid[p][loc+1])/2]
        return new
    
    def add_to_best_models(self, model, score):
        with open('best_models_order.pickle', 'rb') as f:
            best_models_order = pickle.load(f)
        with open('best_models_train_shape.pickle', 'rb') as f:
            best_models_train_shape = pickle.load(f)
        for i in range(0, len(best_models_order)):
            if score["pos_accuracy"] > best_models_order[i]:
                for j in range(len(best_models_order)-1, i, -1):
                    best_models_order[j] = best_models_order[j-1]
                    best_models_train_shape[j] = best_models_train_shape[j-1]
                    os.remove(str("best_model-" + str(j) + "-m.model"))
                    os.rename(str("best_model-" + str(j-1) + "-m.model"), str("best_model-" + str(j) + "-m.model"))
                best_models_order[i] = score["pos_accuracy"]
                best_models_train_shape[i] = self.x_train.shape
#                 best_models_train_shape[i] = [3058, 32764]
                model.save(str("best_model-" + str(i) + "-m.model"))
                with open('best_models_order.pickle', 'wb') as f:
                    pickle.dump(best_models_order, f)
                with open('best_models_train_shape.pickle', 'wb') as f:
                    pickle.dump(best_models_train_shape, f)
                break
        return None
    
    def grid_from_comb(self, last, comb):
        grids = self.data["grids"][last]
        for key in grids.keys():
            check = 0
            for key2 in grids[key].keys():
                if comb[key2] in grids[key][key2]:
                    check += 1
            if check == len(grids[key]):
                return grids[key]
        raise ValueError("grid not found")
                    
    
    def run_comb(self, comb):
        for k in range(0, self.level+1):
            model = self.train(self.x_train, self.y_train, self.x_val, self.y_val, params=comb, loss=self.loss, metric=self.metric, verbose=self.verbose)
            yp_test = model.predict(self.x_test)
            if k == 0:
                score = self.test_metric(self.y_test, yp_test)
            else:
                score = ((score * k) + self.test_metric(self.y_test, yp_test))/(k+1)
        del yp_test, k
        return model, score
        
    
    def run_round(self, n):
        n = str(n)
        try:
            self.data["combs"][n]
        except KeyError:
            if int(n) == 1:
                self.data["grids"][n] = {}
                for p in self.data['base_range'].keys():
                    self.data["grids"][n][p] = self.list_from_range(self.data['base_range'][p])
                combinations = self.generate_combinations(self.data["grids"][n])
                
            elif int(n) == 2:
                self.data["grids"][n] = {}
                last = str(int(n)-1)
                combinations = []
                for key in self.data["best"][last].keys():
                    if int(n) == 2:
                        grid = self.data["grids"][last]
                    else:
                        grid = grid_from_comb(last, self.data["best"][last][key])
                    rn = self.range_from_last(grid, self.data["best"][last][key])
                    self.data["grids"][n][key] = {}
                    for p in rn.keys():
                        self.data["grids"][n][key][p] = self.list_from_range(rn[p])
                    del rn
                for key in self.data["grids"][n].keys():
                    c = self.generate_combinations(self.data["grids"][n][key])
                    combinations = combinations + c
                combinations = get_unique_combinations(combinations)
            
            self.data["combs"][n] = combinations
            self.data["combs_comp"][n] = []
            self.data["best_score"][n] = 0
            save_data(self.data["bot_name"], self.data, "running")
#             with open('data_dlk.pickle', 'wb') as f:
#                 pickle.dump(self.data, f)
#                 print("Storing Combinations")
            del combinations
        
        if len(self.data["combs"][n]) == len(self.data["combs_comp"][n]):
            return self
        print("Round grid:")
        print(self.data["grids"][n])
        print("%s new combinations found. Trying them." % (len(self.data["combs"][n]) - len(self.data["combs_comp"][n])))
        
#         cores = os.cpu_count() - 1

        for i in range(0, len(self.data["combs"][n])):
            comb = self.data["combs"][n][i]

            if comb in self.data["combs_comp"][n]:
                continue

            print("Combination: %s" % str(i+1))
            print(datetime.datetime.now())
            #print(comb)
            model, score = self.run_comb(comb)
            
            if len(self.data["best"][n]) < self.level:
                self.data["best"][n][str(score)] = comb
            else:
                min_score = sorted(map(float, self.data["best"][n].keys()))[len(self.data["best"][n])-1]
                if score > min_score:
                    self.data["best"][n][str(score)] = comb
                    del self.data["best"][n][min_score]
                    

            if score > self.data["best_score"][n]:
                print("Best")
                #self.best_model = model
                self.data["best_params"][n] = comb
                self.data["best_score"][n] = score
                self.delete_old_model(n)
                model.save(str("model_aglo_dlk-" + str(n) + "-" + str(inserted_id)))
                print(comb)
                print(score)
                self.add_to_best_models(model, score)

            self.data["combs_comp"][n].append(comb)
            save_data(self.data["bot_name"], self.data, "running")
#             with open('data_dlk.pickle', 'wb') as f:
#                 pickle.dump(self.data, f)
            del comb, model, score, inserted_id
            gc.collect()
                
            
        if len(self.data["combs"][n]) == 1:
            self.data["optimized"] = True
        
        return self
    
    def optimize(self, x_train, y_train, x_test, y_test, x_val, y_val, loss='binary_crossentropy', metric='accuracy', test_metric=utils_m.all_accuracy, verbose=1, stages=None, data_label=None, label=None, max_rounds=1, level=1):
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self.x_val = x_val
        self.y_val = y_val
        self.loss = loss
        self.metric = metric
        self.test_metric = test_metric
        self.verbose = verbose
        self.stages = stages
        self.data_label = data_label
        self.label = label
        self.level = level
        
        rounds = list(map(int, self.data["combs"].keys()))
        if len(rounds) == 0:
            c_round = 1
        else:
            rounds.sort()
            c_round = rounds[len(rounds)-1]
        
        while c_round <= max_rounds:
            if self.data["optimized"] == True:
                print("Best parameters found")
                break
            print("Running round: %s" % c_round)
            self.run_round(c_round)
            gc.collect()
            c_round += 1
        print("%s rounds of optimization completed" % str(c_round-1))
        return self
        
    def get_data(self):
        return self.data

In [3]:
a = {1:{0:"a",9:"b"}, 3:{8:"a", 9:"b"}, 2:{7:"c", 8:"b"}}

In [5]:
sorted(a.keys())

[1, 2, 3]