In [1]:
import pandas as pd
import numpy as np
import h5py
import math

from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, regularizers, Flatten
from keras.callbacks import CSVLogger

from sklearn import ensemble, preprocessing, multiclass
from sklearn.preprocessing import Imputer
from sklearn.model_selection import cross_val_score, train_test_split

from collections import Counter

Using TensorFlow backend.


In [2]:
# Transformation

def sqrt(col):
    return list(map(np.sqrt, col));

def freq(col):
    col = np.floor(col)
    counter = Counter(col)
    return [counter.get(elem) for elem in col]

def tanh(col):
    return list(map(np.tanh, col));

def log(col):
    return list(map(np.log, col));

def my_sigmoid(x):
    return 1 / (1 + math.exp(-x))

def sigmoid(col):
    return list(map(my_sigmoid, col))



In [3]:
# Globals

# Datasets
#dids = np.load("datasets/indexes.npy")
dids = np.load("magicindexes.npy")

# RF model parameters
seed = 67
#transformations = [sqrt, freq]
#transformations_name = ["sqrt", "freq"]
transformations = [tanh, log, sigmoid]
transformations_name = ["tanh", "log", "sigmoid"]
trans2target1 = {}
trans2target2 = {}
trans2target3 = {}

# Comrpessed Dataset paramters
qsa_representation = []
num_bin = 10
too_big = 10000

# Neural Nets Parameters and Variables
MLP_LFE_Nets = {}
inp_shape = (2,num_bin)
dropout = 0.2
norm = (0, 10)
pred_threshold = 0.51
train_set_max = 80000
test_set_max = 80000


In [4]:
#def binarize_dataset():

def load_dataset(id):
    X = np.load("datasets/binary_numeric/" + str(id) + "-data.npy")
    y = np.load("datasets/binary_numeric/" + str(id) + "-target.npy")
    categorical = np.load("datasets/binary_numeric/" + str(id) + "-categorical.npy")
    return X,y,categorical

    
def evaluate_model(X, y, categorical):
    imp = Imputer(missing_values="NaN")
    X = imp.fit_transform(X)
    enc = preprocessing.OneHotEncoder(categorical_features=categorical)
    X = enc.fit_transform(X)
    clf = ensemble.RandomForestClassifier(random_state=seed)
    #clf_ovsr = multiclass.OneVsRestClassifier(clf, n_jobs=-1)
    
    return cross_val_score(clf, X, y,cv=10)
    
def is_positive(X,y,categorical,base_score,transformation,feature):
    transformed_feature = np.array(transformation(X[:,feature]))
    X = np.c_[X,transformed_feature]
    categorical = np.append(categorical,False)
    new_score = evaluate_model(X,y,categorical).mean()
    
    return 1 if(base_score <= (new_score - 0.01)) else 0

def is_positive_2(X, y, categorical, base_score, transformation, feature):
    transformed_feature = np.array(transformation(X[:,feature]))
    new_score = evaluate_model(transformed_feature.reshape(-1,1),y,[False]).mean()
    
    return 1 if(base_score <= (new_score - 0.005)) else 0

def is_positive_3(X, y, categorical, base_score, transformation, feature):
    transformed_feature = np.array(transformation(X[:,feature]))
    new_score = evaluate_model(transformed_feature.reshape(-1,1),y,[False]).mean()
    
    return 1 if(new_score > base_score*1.01) else 0
    
    

In [5]:
# Build the target for the compressed feature
bad_datasets = []

def build_target_for_compressed(dids):

    for transf in transformations:
        trans2target1[transf] = []
        trans2target2[transf] = []
        trans2target3[transf] = []

    
    for did in dids:
        print("Start dataset number", did)
        
        try:

            X, y, categorical = load_dataset(did)       

            new_indexes = []

            if(X.shape[0] > too_big):
                new_indexes = np.random.choice(X.shape[0], too_big, replace=False)
                X = X[new_indexes]
                y = y[new_indexes]

            base_score = evaluate_model(X, y, categorical).mean()

            # Find the indexes of numeric attributes
            numerical_indexes = np.where(np.invert(categorical))[0]
            sample_numerical_indexes = np.random.choice(numerical_indexes, min(numerical_indexes.shape[0],10), replace=False)

            for i,transf in enumerate(transformations):
                for feature in sample_numerical_indexes:

                    print("\tEvaluating feature " + str(feature))

                    mlp_target_1 = is_positive(X,y,categorical,base_score,transf, feature)
                    mlp_target_2 = is_positive_2(X,y,categorical,base_score,transf, feature)
                    mlp_target_3 = is_positive_3(X,y,categorical,base_score,transf, feature)

                    print("\t\t" + str(mlp_target_1), str(mlp_target_2), str(mlp_target_3))

                    trans2target1[transf].append((did,feature,mlp_target_1))
                    trans2target2[transf].append((did,feature,mlp_target_2))
                    trans2target3[transf].append((did,feature,mlp_target_3))

        except:
                print("The evaluation of dataset " + str(did) + " failed")
                bad_datasets.append(did)
                continue

In [6]:
# Save the result
def save_target_for_compressed(path):

    for transf, name in zip(transformations, transformations_name):
        np.save(path + name + "1", trans2target1[transf])
        np.save(path + name + "2", trans2target2[transf])
        np.save(path + name + "3", trans2target3[transf])


In [7]:
def normalize_Rx(matrix):
    
    Rxc = np.zeros(shape=matrix.shape)
    
    for i,row in enumerate(matrix):
        max_c = np.amax(row)
        min_c = np.amin(row)
        bin_width = (max_c-min_c)/(norm[1]-norm[0])
        Rxc[i] = np.apply_along_axis(lambda x : np.floor((x-min_c)/(bin_width)+norm[0]), 0, row)
    
    return Rxc

def to_quantile_sketch_array(did, col, targets, bins, t_class, index):
    max_c = np.nanmax(col)
    min_c = np.nanmin(col)
    bin_width = (max_c-min_c)/num_bin
    Rx = np.zeros(shape=(2,num_bin))
    
    if(bin_width == 0):
        return
    
    for val,y in zip(col,targets):
        if not np.isnan(val):
            bin_value = int(np.floor((val-min_c)/bin_width))
            bin_value = np.clip(bin_value, 0, num_bin-1)
            my_class = 0 if t_class == y else 1
            Rx[my_class][bin_value] = Rx[my_class][bin_value] + 1
            
    Rx = normalize_Rx(Rx)

    qsa_representation.append(np.insert(Rx.flatten(), 0, [did,index]))

In [8]:
# Build the compressed dataset
def build_compressed_dataset(dids):

    for did in dids:
        print("Start dataset number", did)

        try:
            X, y, categorical = load_dataset(did)
        
            if(X.shape[0] > too_big):
                new_indexes = np.random.choice(X.shape[0], too_big, replace=False)
                X = X[new_indexes]
                y = y[new_indexes]

            numerical_indexes = np.where(np.invert(categorical))[0]

            classes = set(y)

            for t_class in classes:
                for index in numerical_indexes:
                    to_quantile_sketch_array(did,X[:,index], y, num_bin, t_class, index)
                
        except:
            print("Error with dataset " + str(did))
            continue;
        
    

In [9]:
# Save the compressed datasets
def save_compressed_dataset(path):
    np.save(path + "compressed.npy", qsa_representation)

In [10]:
# CREATING THE NEURAL NETS

def initialize_MLPs():
    
    for transf in transformations_name:
        model = Sequential()

        model.add(Dense(64, input_shape=inp_shape, W_regularizer=regularizers.l2(0.01)))
        model.add(Activation('softmax'))
        model.add(Dropout(dropout))
        model.add(Flatten())
        model.add(Dense(output_dim=1))
        model.add(Activation('relu'))

        # For a binary classification problem
        model.compile(optimizer='rmsprop',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        MLP_LFE_Nets[transf] = model
    

In [71]:

def load_compressed_ds():
    data = pd.DataFrame(np.load("datasets/compressed/compressed.npy"))
    data.columns = [str(i) for i in range (num_bin*2+2)]
    return data


def assemble_training_set(compressed, transformation_targets):
    targetDf = pd.DataFrame(transformation_targets)
    targetDf.columns = ["0","1", "2"]
    merged = pd.merge(compressed, targetDf, how='left', on=["0", "1"])
    class_1 = merged.ix[:,2:num_bin + 2].values
    class_2 = merged.ix[:,num_bin + 2:-1].values
    target = np.array(merged.ix[:,-1].values)
    meta_inf = np.array(merged.ix[:,:2].values)
    meta_target = np.c_[target, meta_inf]
    X = []
    
    for c1, c2 in zip(class_1, class_2):
        X.append([c1,c2])    

    return np.array(X), np.array(meta_target)


def split_training_test():
    compressed_ds = load_compressed_ds()

    for transf, name in zip(transformations, transformations_name):
        transformation_targets = np.load("datasets/compressed/" + name + "3.npy")
        X,y = assemble_training_set(compressed_ds, transformation_targets)
        X_s_tr, X_s_test, y_s_tr, y_s_test = train_test_split(X, y, test_size=0.3)
        
        # Dropping the meta-info from training set
        y_s_tr = y_s_tr[:,:1]
        
        np.save("datasets/training/" + name + "-data_split",X_s_tr)
        np.save("datasets/training/" + name + "-target_split",y_s_tr)
        np.save("datasets/test/" + name + "-data_split",X_s_test)
        np.save("datasets/test/" + name + "-target_split",y_s_test)
 
        
def load_training_set(transf):
    
    X = np.load("datasets/training/" + transf + "-data_split.npy")
    y = np.load("datasets/training/" + transf + "-target_split.npy")
    
    return X,y


In [12]:
def balance_dataset(X, y, pos_perc = 0.5):
    X = np.array(X)
    y = np.array(y)

    cnt = Counter(y)

    neg_num = cnt[0]
    pos_num = cnt[1]

    neg_index = (y == 0)
    pos_index = (y == 1)

    X_pos = X[pos_index]
    X_neg = X[neg_index]
    y_pos = y[pos_index]
    y_neg = y[neg_index]

    if pos_perc >= 0.5:
        if pos_num <= neg_num:
            sample_neg_num = int(X_pos.shape[0] * (1/pos_perc - 1))
            neg_sampled_indexes = np.random.choice(X_neg.shape[0], sample_neg_num, replace=False)
            X_neg = X_neg[neg_sampled_indexes]
            y_neg = y_neg[neg_sampled_indexes]

        else:
            sample_neg_ideal_size = int(X_pos.shape[0] * (1/pos_perc - 1))

            if(sample_neg_ideal_size > X_neg.shape[0]):
                sample_pos_num = int(X_neg.shape[0] * (1/(1-pos_perc) - 1))
                pos_sampled_indexes = np.random.choice(X_pos.shape[0], sample_pos_num, replace=False)
                X_pos = X_pos[pos_sampled_indexes]
                y_pos = y_pos[pos_sampled_indexes]
            else:
                neg_sampled_indexes = np.random.choice(X_neg.shape[0], sample_neg_ideal_size, replace=False) 
                X_neg = X_neg[neg_sampled_indexes]
                y_neg = y_neg[neg_sampled_indexes]
    else:
        if pos_num <= neg_num:
            sample_pos_ideal_size = int(X_neg.shape[0] * (1/(1-pos_perc) - 1))

            if(sample_pos_ideal_size > X_pos.shape[0]):
                sample_neg_num = int(X_pos.shape[0] * (1/pos_perc - 1))
                neg_sampled_indexes = np.random.choice(X_neg.shape[0], sample_neg_num, replace=False)
                X_neg = X_neg[neg_sampled_indexes]
                y_neg = y_neg[neg_sampled_indexes]
            else:
                pos_sampled_indexes = np.random.choice(X_pos.shape[0], sample_pos_ideal_size, replace=False) 
                X_pos = X_pos[pos_sampled_indexes]
                y_pos = y_pos[pos_sampled_indexes]
        else:
            sample_pos_num = int(X_neg.shape[0] * (1/(1-pos_perc) - 1))
            pos_sampled_indexes = np.random.choice(X_pos.shape[0], sample_pos_num, replace=False)
            X_pos = X_pos[pos_sampled_indexes]
            y_pos = y_pos[pos_sampled_indexes]

    X = np.concatenate((X_pos, X_neg), axis=0)
    y = np.concatenate((y_pos, y_neg) , axis=0)      

    shuffle_index = np.random.choice(X.shape[0], X.shape[0], replace=False)

    X = X[shuffle_index]
    y = y[shuffle_index]

    return X,y

In [13]:
# Training the nets

def train_MLPs():

    for transf, name in zip(transformations, transformations_name):
        print("Start training " + name)
        
        X, y = load_training_set(name)
        
        #if(X.shape[0] > train_set_max):
        #        new_indexes = np.random.choice(X.shape[0], too_big, replace=False)
        #        X = X[new_indexes]
        #        y = y[new_indexes]
        
        X, y = balance_dataset(X, y.reshape(y.shape[0]), pos_perc=0.5)
        print("dataset_balanced")
        
        y = y.reshape(y.shape[0],1)

        MLP_LFE_Nets[name].summary()
        print ("Inputs: {}".format(MLP_LFE_Nets[name].input_shape))
        print ("Outputs: {}".format(MLP_LFE_Nets[name].output_shape))
        print ("Actual input: {}".format(X.shape))
        print ("Actual output: {}".format(y.shape))
        
        csv_logger = CSVLogger("datasets/MLPs/log/log-"+ name +".csv", append=True, separator=';')
        MLP_LFE_Nets[name].fit(X, y, epochs=100, verbose=1, validation_split=0.2 ,callbacks=[csv_logger]) 


In [14]:

def save_MLPs():
    for transf in transformations_name:
        MLP_LFE_Nets[transf].save_weights("datasets/MLPs/" + transf + "-weights")
        model_json = MLP_LFE_Nets[transf].to_json()
        with open("datasets/MLPs/" + transf + "-net_model", "w") as f:
            f.write(model_json)

def load_MLPs():
    
    for name in transformations_name:
        json_file = open('datasets/MLPs/' + name + "-net_model", 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        
        # load weights into new model
        loaded_model.load_weights("datasets/MLPs/" + name + "-weights")

        # evaluate loaded model on test data
        loaded_model.compile(optimizer='rmsprop',
                             loss='binary_crossentropy',
                             metrics=['accuracy'])
        
        MLP_LFE_Nets[name] = loaded_model


In [15]:
def load_test_set():
    
    X = np.load("datasets/test/" + transformations_name[0] + "-data_split.npy")
    y_meta = np.load("datasets/test/" + transformations_name[0] + "-target_split.npy")
    t = np.full((y_meta.shape[0],1), 0)
    y_meta = np.concatenate((y_meta, t), axis=1)
    
    for i,name in enumerate(transformations_name[1:]):
        X = np.concatenate((X, np.load("datasets/test/" + name + "-data_split.npy")), axis=0)
        y_meta_tmp = np.load("datasets/test/" + name + "-target_split.npy")
        t = np.full((y_meta_tmp.shape[0],1),i+1)
        y_meta_tmp = np.concatenate((y_meta_tmp, t), axis=1)
        y_meta = np.concatenate((y_meta, y_meta_tmp), axis=0)
    
    return X,y_meta

In [16]:
def evaluate_transformation_classifier():
    
    # Number of prediction on features
    num_of_prediction = {}
    # Number of correct prediction on features
    num_of_correct_prediction = {}
    # Number of dataset which received a prediction
    good_predicted_dids = set()
    num_of_predicted_dataset = 0
    
    pred_mat = []
    
    X, y_meta = load_test_set()
    
    if(X.shape[0] > test_set_max):
        new_indexes = np.random.choice(X.shape[0], too_big, replace=False)
        X = X[new_indexes]
        y_meta = y_meta[new_indexes]
    
    for transf in transformations_name:
        pred_mat.append(MLP_LFE_Nets[transf].predict(X))
        num_of_prediction[transf] = 0
        num_of_correct_prediction[transf] = 0
        
    pred_mat = np.array(pred_mat).transpose()
    
    for predictions,did,feature in zip(pred_mat[0],y_meta[:,1], y_meta[:,2]):
        pmax = np.amax(predictions)
        print(predictions)
        
        if pmax > pred_threshold:
            index = np.where(predictions==pmax)[0][0]
            print(index)
            num_of_prediction[transformations_name[index]] += 1
            
            # Select the target for the transformation and the dataset
            positive_example_found =  np.where((y_meta[:,0] == 1)       &  \
                                               (y_meta[:,1] == did)     &  \
                                               (y_meta[:,2] == feature) &  \
                                               (y_meta[:,3] == index))     \
                                                                       [0].shape[0] > 0
            
            if(positive_example_found):
                good_predicted_dids.add(did) 
                num_of_correct_prediction[transformations_name[index]] += 1
    
    
    
    for transf in transformations_name:
        
        if(num_of_prediction[transf] == 0):
            print("No predictions have been made")
            continue
    
        print("Evalutation of the transformation classifier: " + transf)
        print("\tNumber of prediction:", num_of_prediction[transf])
        print("\tNumber of Correct prediciton:", num_of_correct_prediction[transf])
        print("Accuracy:", num_of_correct_prediction[transf]/num_of_prediction[transf])
        
    print("\n")    
    print("Number of datasets who received a good prediction:", len(good_predicted_dids))
    print("Total number of positive examples: " + str(np.where(y_meta[:,0] == 1)[0].shape[0]))
    print("Total number of examples: " + str(X.shape[0]))

# Build and Preprocess the Dataset

In [17]:

build_target_for_compressed(dids)
save_target_for_compressed("datasets/compressed/")
    

Start dataset number 161
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
		0 0 0
Start dataset number 162
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 1
		0 0 0
Start dataset number 464
	Evaluating feature 0
		1 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 0


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 1


  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 0
		1 0 0
	Evaluating feature 1
		0 0 0
Start dataset number 724
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		1 0 0
Start dataset number 741
	Evaluating feature 0
		0 0 0
	Evaluating feature 0
		0 1 1
	Evaluating feature 0
		0 0 0
Start dataset number 772
	Evaluating feature 1
		0 1 1
	Evaluating feature 0
		1 1 1
	Evaluating feature 2
		1 1 1
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 1 1
	Evaluating feature 0
The evaluation of dataset 772 failed
Start dataset number 774
	Evaluating feature 0


  from ipykernel import kernelapp as app


		1 1 1
	Evaluating feature 2
		0 1 1
	Evaluating feature 1
		0 1 1
	Evaluating feature 0
		0 1 1
	Evaluating feature 2
		1 0 0
	Evaluating feature 1
		1 0 0
	Evaluating feature 0
		0 1 1
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0
Start dataset number 795
	Evaluating feature 0
		1 1 1
	Evaluating feature 1
		0 1 1
	Evaluating feature 2
		0 1 1
	Evaluating feature 0
		0 1 1
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 1 1
	Evaluating feature 2


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		1 0 0
	Evaluating feature 0
		1 1 1
	Evaluating feature 1
		0 1 1
	Evaluating feature 2
		0 1 1
Start dataset number 811
	Evaluating feature 0
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 0
		0 0 0
Start dataset number 814
	Evaluating feature 1
		1 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 1
		1 0 0
	Evaluating feature 0
		0 1 1
	Evaluating feature 1
		1 0 0
	Evaluating feature 0
		0 0 0
Start dataset number 827
	Evaluating feature 0
		0 1 1
	Evaluating feature 1
		0 1 1
	Evaluating feature 2
		1 1 1
	Evaluating feature 0
		0 1 1
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 1 1
	Evaluating feature 2


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		1 1 1
	Evaluating feature 0
		1 1 1
	Evaluating feature 1
		0 1 1
	Evaluating feature 2
		0 1 1
Start dataset number 860
	Evaluating feature 1
		0 1 1
	Evaluating feature 0
		0 0 0
	Evaluating feature 1
The evaluation of dataset 860 failed
Start dataset number 914
	Evaluating feature 0


  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 0
		0 0 0
Start dataset number 923
	Evaluating feature 0
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
The evaluation of dataset 923 failed
Start dataset number 925
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 3
		0 0 0
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 0


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 3


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 3
		0 0 0
Start dataset number 931
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 1 1
	Evaluating feature 1
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0
Start dataset number 934
	Evaluating feature 4
		0 1 1
	Evaluating feature 4
The evaluation of dataset 934 failed
Start dataset number 948


  from ipykernel import kernelapp as app


	Evaluating feature 1
		0 1 1
	Evaluating feature 0
		1 1 1
	Evaluating feature 2
		1 1 1
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 1 1
	Evaluating feature 0
The evaluation of dataset 948 failed
Start dataset number 983


  from ipykernel import kernelapp as app


	Evaluating feature 3
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 3
The evaluation of dataset 983 failed
Start dataset number 997
	Evaluating feature 2


  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 3
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 3
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 3
		0 0 0
	Evaluating feature 1
		0 0 0
Start dataset number 1169
	Evaluating feature 5
		0 0 0
	Evaluating feature 6
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 5
		0 0 0
	Evaluating feature 6
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 5
		0 0 0
	Evaluating feature 6
		0 0 0
	Evaluating feature 1
		0 0 0
Start dataset number 1460
	Evaluating feature 1
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 0


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 0
		0 0 0
Start dataset number 1462
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 3
		0 0 0
	Evaluating feature 0


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 2


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 3


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 3
		0 0 0
Start dataset number 1464
	Evaluating feature 0
		1 1 1
	Evaluating feature 1
		1 1 1
	Evaluating feature 2
		0 1 1
	Evaluating feature 3
		1 1 1
	Evaluating feature 0
The evaluation of dataset 1464 failed
Start dataset number 1470
	Evaluating feature 2


  from ipykernel import kernelapp as app


		1 1 1
	Evaluating feature 2
The evaluation of dataset 1470 failed
Start dataset number 1502


  from ipykernel import kernelapp as app


	Evaluating feature 1
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
The evaluation of dataset 1502 failed
Start dataset number 40698
	Evaluating feature 0


  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 6
		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 6
The evaluation of dataset 40698 failed
Start dataset number 40704
	Evaluating feature 0


  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0
	Evaluating feature 0


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 2


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 1


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


		0 0 0
	Evaluating feature 0
		0 0 0
	Evaluating feature 2
		0 0 0
	Evaluating feature 1
		0 0 0


In [None]:

build_compressed_dataset(dids)
save_compressed_dataset("datasets/compressed/")


# Train Test Split

In [72]:

split_training_test()


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  if sys.path[0] == '':



# Train the MLPs


In [73]:

initialize_MLPs()


  
  if sys.path[0] == '':


In [74]:

train_MLPs()


Start training tanh
dataset_balanced
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_43 (Dense)             (None, 2, 64)             704       
_________________________________________________________________
activation_43 (Activation)   (None, 2, 64)             0         
_________________________________________________________________
dropout_22 (Dropout)         (None, 2, 64)             0         
_________________________________________________________________
flatten_22 (Flatten)         (None, 128)               0         
_________________________________________________________________
dense_44 (Dense)             (None, 1)                 129       
_________________________________________________________________
activation_44 (Activation)   (None, 1)                 0         
Total params: 833
Trainable params: 833
Non-trainable params: 0
_________________________________________

Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Start training log
dataset_balanced
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_45 (Dense)             (None, 2, 64)             704       
_________________________________________________________________
activation_45 (Activation)   (None, 2, 64)             0         
_____________________________________________________________

Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100


Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Start training sigmoid
dataset_balanced
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_47 (Dense)             (None, 2, 64)             704       
_________________________________________________________________
activation_47 (Activation)   (None, 2, 64)             0         
_________________________________________________________________
dropout_24 (Dropout)         (None, 2, 64)             0         
_________________________________________________________________
flatten_24 (Flatten)         (None, 128)               0         
_________________________________________________________________
dense_48 (Den

Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100




In [80]:

save_MLPs()


# Load the nets

In [48]:

load_MLPs()


FileNotFoundError: [Errno 2] No such file or directory: 'datasets/MLPs/tanh-net_model'


# Test the nets


In [79]:

evaluate_transformation_classifier()


[ 0.44530952  0.28902817  0.07046628]
[ 0.38712078  0.29309204  0.04775468]
[ 0.6537044   0.5835582   0.41724697]
0
[ 0.43407601  0.09396598  0.        ]
[ 0.22742671  0.44840541  0.30840462]
[ 0.29700139  0.3171421   0.24446476]
[ 0.59472287  0.57507551  0.41575238]
0
[ 0.58125842  0.          0.06320802]
0
[ 0.4585067  0.         0.       ]
[ 0.38183966  0.3472912   0.25826061]
[ 0.41173804  0.          0.16909601]
[ 0.25090474  0.48724788  0.42239356]
[ 0.31169158  0.22103906  0.04596939]
[ 0.43340117  0.216066    0.07156242]
[ 0.20837322  0.54514337  0.35950559]
[ 0.58375496  0.          0.0602158 ]
0
[ 0.01393521  0.          0.        ]
[ 0.54057533  0.50846046  0.36552057]
[ 0.29073495  0.34341344  0.30640164]
[ 0.35750943  0.04096605  0.        ]
[ 0.40315819  0.4387159   0.29065526]
[ 0.14351393  0.1223533   0.18024796]
[ 0.45319307  0.          0.        ]
[ 0.41435248  0.03748466  0.1235563 ]
[ 0.18460017  0.16909397  0.13306564]
[ 0.  0.  0.]
[ 0.42291659  0.2964884   0.176

In [78]:
pred_threshold = 0.55
