In [9]:
import pandas as pd
import numpy as np
from collections import defaultdict 
import sys
import os
import matplotlib.pyplot as plt
from tabulate import tabulate

import keras 
from keras.models import Sequential, Model
from keras.layers import Dense, Input, Dropout, Embedding, LSTM, Flatten, Lambda
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from keras.preprocessing.text import Tokenizer

from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
from keras.utils.vis_utils import plot_model

In [10]:
ADDR = '/nfs_home/nbhardwaj/data/rds_data/400/'

In [27]:
from sklearn.preprocessing import LabelEncoder

class LabelEncoderExt(object):
    def __init__(self):
        """
        It differs from LabelEncoder by handling new classes and providing a value for it [Unknown]
        Unknown will be added in fit and transform will take care of new item. It gives unknown class id
        """
        self.label_encoder = LabelEncoder()
        # self.classes_ = self.label_encoder.classes_

    def fit(self, data_list):
        """
        This will fit the encoder for all the unique values and introduce unknown value
        :param data_list: A list of string
        :return: self
        """
        self.label_encoder = self.label_encoder.fit(list(data_list) + ['Unknown'])
        self.classes_ = self.label_encoder.classes_

        return self

    def transform(self, data_list):
        """
        This will transform the data_list to id list where the new values get assigned to Unknown class
        :param data_list:
        :return:
        """
        new_data_list = list(data_list)
        m = {}
        for x in self.label_encoder.classes_:
            m[x] = True
        for ind, y in enumerate(new_data_list):
            if(m.get(y) is None):
                new_data_list[ind] = 'Unknown'
#         for unique_item in np.unique(data_list):
#             if unique_item not in self.label_encoder.classes_:
#                 new_data_list = ['Unknown' if x==unique_item else x for x in new_data_list]
        return self.label_encoder.transform(new_data_list)
    

In [26]:
a = [1, 2, 3]
for x in a:
    if(x ==1):
        x=0
print(a)

[1, 2, 3]


In [12]:
def create_model(embed_size = 10):
    inp1 = Input(shape = (1,))
    inp2 = Input(shape = (1,))
    inp3 = Input(shape = (1,))

    embed1 = Embedding(len(le_inst.classes_), embed_size, input_length = 1)(inp1)
    embed2 = Embedding(len(le_delta.classes_), embed_size, input_length = 1)(inp2)

    merged_inp = keras.layers.concatenate([embed1, embed2], axis = 1)
    merged_inp = Flatten()(merged_inp)
    merged_inp = keras.layers.concatenate([merged_inp, inp3])

    out = Dense(200, activation = 'relu')(merged_inp)
    out = Dense(8, activation = 'softmax')(out)

    model = Model([inp1, inp2, inp3], out)
    return model

In [None]:
sets = [x for x in range(64)]
inst_vocab = []
delta_vocab = []
train_acc = []
test_acc = []
for cset in sets:
    df = pd.read_csv(ADDR+str(cset)+'.csv', index_col = [0])
    df.Mode = np.where(df.Mode.values=='R', 1, -1)
    df.Mode = df['Mode'].astype('str')
    df.Instruction = df.Instruction.astype('str')
    df.delta = df.delta.astype('float')
    df.label = df.label - 1
    X = df[['Instruction', 'delta', 'Mode']].values[1:]
    y = df[['label']].values[1:]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)
    print("--------------------split done---------------------")
    le_inst = LabelEncoderExt()
    le_inst.fit(X_train[:, 0])
    le_delta = LabelEncoderExt()
    le_delta.fit(X_train[:, 1])
    print("----------------labels done----------------------")
    X_train[:, 0] = le_inst.transform(X_train[:, 0])
    X_train[:, 1] = le_delta.transform(X_train[:, 1])
    print("--------")
    
    X_test[:, 0] = le_inst.transform(X_test[:, 0])
    X_test[:, 1] = le_delta.transform(X_test[:, 1])
    print("-------------------labels transformed---------------------")
    model = create_model()
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    filepath = '/nfs_home/nbhardwaj/model_weights/RD_Embedding/'+str(cset)+'.hdf5'
    # checkpointer = ModelCheckpoint(filepath, monitor = 'val_acc', save_best_only = True, mode = 'max', verbose = 1)
    model.fit([X_train[:, 0], X_train[:, 1], X_train[:, 2]], to_categorical(y_train), epochs = 2)
    print("------------training done------------")
    model.save_weights(filepath)
    t_ac = model.evaluate([X_test[:, 0], X_test[:, 1], X_test[:, 2]], to_categorical(y_test))[1]
    test_acc.append(t_ac)
    train_acc.append(model.evaluate([X_train[:, 0], X_train[:, 1], X_train[:, 2]], to_categorical(y_train))[1])
    inst_vocab.append(len(le_inst.classes_))
    delta_vocab.append(len(le_delta.classes_))
    print("--------------done processing for set---------->", cset, '|| accuracy||', t_ac)

--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 0 || accuracy|| 0.8951742053031921
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 1 || accuracy|| 0.8384438157081604
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 2 || accuracy|| 0.8696214556694031
--------------------split done---------------------
------------

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



--------------done processing for set----------> 7 || accuracy|| 0.9597495198249817
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
 445376/3525763 [==>...........................] - ETA: 10:38 - loss: 0.3458 - accuracy: 0.9147

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 2/2
------------training done------------
--------------done processing for set----------> 8 || accuracy|| 0.9180603623390198
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 9 || accuracy|| 0.9875941872596741
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 10 || accuracy|| 0.9657191038131714
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------d

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



------------training done------------
 578112/2595506 [=====>........................] - ETA: 1:24

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



--------------done processing for set----------> 12 || accuracy|| 0.7275398373603821
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 13 || accuracy|| 0.9399990439414978
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 14 || accuracy|| 0.9642935991287231
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 15 || accura

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



------------training done------------

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



------------training done------------
--------------done processing for set----------> 21 || accuracy|| 0.9582710266113281
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



------------training done------------
--------------done processing for set----------> 24 || accuracy|| 0.9760700464248657
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 25 || accuracy|| 0.9749969840049744
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 26 || accuracy|| 0.8660642504692078
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done proc

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 2/2
------------training done------------
--------------done processing for set----------> 35 || accuracy|| 0.9764665961265564
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 36 || accuracy|| 0.8727632761001587
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
--------------done processing for set----------> 37 || accuracy|| 0.8630595207214355
--------------------split done---------------------
----------------labels done----------------------
--------
-------------------labels transformed---------------------
Epoch 1/2
Epoch 2/2
------------training done------------
-------------

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 4104864/21023465 [====>.........................] - ETA: 54:40 - loss: 0.1959 - accuracy: 0.9430

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 4616384/21023465 [=====>........................] - ETA: 53:00 - loss: 0.1958 - accuracy: 0.9431

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





In [None]:
!ls

----------------labels done----------------------
--------




-------------------labels transformed---------------------

Epoch 1/2
Epoch 2/2
------------training done------------


NameError: name 'delta_vocab' is not defined

In [145]:
# df.head()

# # Read -> 1 ; Write -> -1




# print(np.min(df.Instruction.values), np.max(df.Instruction.values))

# print(X.shape, y.shape, X_train.shape, y_train.shape, X_test.shape, y_test.shape)

# print(len(np.unique(X_train[:, 0])), len(np.unique(X[:, 0])), len(np.unique(X_train[:, 1])), len(np.unique(X[:, 1])))



# le_inst = LabelEncoderExt()
# le_inst.fit(X_train[:, 0])
# le_delta = LabelEncoderExt()
# le_delta.fit(X_train[:, 1])

# X_train[:, 0] = le_inst.transform(X_train[:, 0])
# X_train[:, 1] = le_delta.transform(X_train[:, 1])

# X_test[:, 0] = le_inst.transform(X_test[:, 0])
# X_test[:, 1] = le_delta.transform(X_test[:, 1])

# print(len(le_inst.classes_), len(le_delta.classes_))

# def create_model(embed_size = 10):
#     inp1 = Input(shape = (1,))
#     inp2 = Input(shape = (1,))
#     inp3 = Input(shape = (1,))

#     embed1 = Embedding(len(le_inst.classes_), embed_size, input_length = 1)(inp1)
#     embed2 = Embedding(len(le_delta.classes_), embed_size, input_length = 1)(inp2)
    
#     merged_inp = keras.layers.concatenate([embed1, embed2], axis = 1)
#     merged_inp = Flatten()(merged_inp)
#     merged_inp = keras.layers.concatenate([merged_inp, inp3])
    
#     out = Dense(200, activation = 'relu')(merged_inp)
#     out = Dense(8, activation = 'softmax')(out)
    
#     model = Model([inp1, inp2, inp3], out)
#     return model
    

# model = create_model()

# print(model.summary())

# plot_model(model)

# model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# filepath = '/nfs_home/nbhardwaj/model_weights/RD_Embedding/'+str(cset)+'.hdf5'

# # checkpointer = ModelCheckpoint(filepath, monitor = 'val_acc', save_best_only = True, mode = 'max', verbose = 1)
# model.fit([X_train[:, 0], X_train[:, 1], X_train[:, 2]], to_categorical(y_train), epochs = 2)


# model.save_weights(filepath)

# acc_test = model.evaluate([X_test[:, 0], X_test[:, 1], X_test[:, 2]], to_categorical(y_test))

# print(acc_test[0])
# print(model.metrics_names)

# len(le_inst.classes_)



Unnamed: 0,ICount,Instruction,Data,Mode,set,rd,delta,label
0,638313090423,4722720,2198847782096,R,0,2,,2
1,638313090428,4722742,3646153,R,0,2,-2198844000000.0,2
2,638313090748,4722720,2198847782096,R,0,2,2198844000000.0,2
3,638313090753,4722742,3646153,R,0,2,-2198844000000.0,2
4,638313091064,4722720,2198847782096,R,0,2,2198844000000.0,2
