In [1]:
import pickle
import joblib

import numpy as np

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, LSTM, Bidirectional, Dropout
from tensorflow.keras.metrics import CategoricalAccuracy, AUC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import BaggingClassifier

In [2]:
X = np.load("./dataset/SDC_X.npy")
Y = np.load("./dataset/SDC_Y.npy")

print("X Shape:", X.shape)
print("Y Shape:", Y.shape)

model_BAG = BaggingClassifier(base_estimator=MultinomialNB(), n_estimators=500)
model_BAG.fit(X, Y)

X Shape: (12720, 10000)
Y Shape: (12720,)


In [3]:
joblib.dump(model_BAG, "./models/SDC")

['./models/SDC']

In [4]:
X = np.load("./dataset/SCC_X.npy")
Y = np.load("./dataset/SCC_Y.npy")

tokenizer = pickle.load(open("./models/message_tokenizer.pkl",  "rb"))
embeddings = pickle.load(open("./dataset/glove_embeddings.pkl", "rb"))

embeddingMatrix = np.random.random((len(tokenizer.word_index) + 1, 50))
for word, i in tokenizer.word_index.items():
    embeddingVector = embeddings.get(word)
    if embeddingVector is not None:
        if len(embeddingMatrix[i]) != len(embeddingVector):
            print(
                "could not broadcast input array from shape", 
                str(len(embeddingMatrix[i])), 
                "into shape", 
                str(len(embeddingVector)), 
                "Please make sure your"
            )
            exit(1)
        embeddingMatrix[i] = embeddingVector
        
bilstm_input_layer = Input(shape=(X.shape[1],))
bilstm_embedding_layer = Embedding(len(tokenizer.word_index) + 1, embeddingMatrix.shape[1], trainable=True)(bilstm_input_layer)
bilstm_layer_1 = Bidirectional(LSTM(32, return_sequences=True, recurrent_dropout=0.2))(bilstm_embedding_layer)
dropout_layer_1 = Dropout(0.2)(bilstm_layer_1)
bilstm_layer_2 = Bidirectional(LSTM(32, return_sequences=True, recurrent_dropout=0.2))(dropout_layer_1)
dropout_layer_2 = Dropout(0.2)(bilstm_layer_2)
bilstm_layer_3 = Bidirectional(LSTM(32, recurrent_dropout=0.2))(dropout_layer_2)
dropout_layer_3 = Dropout(0.2)(bilstm_layer_3)
bilstm_hidden_layer_1 = Dense(200, activation='relu')(dropout_layer_3)
bilstm_hidden_layer_2 = Dense(100, activation='relu')(bilstm_hidden_layer_1)
bilstm_hidden_layer_3 = Dense(50, activation='relu')(bilstm_hidden_layer_2)
bilstm_output_layer = Dense(Y.shape[1], activation='sigmoid')(bilstm_hidden_layer_3)

model_bilstm = Model(inputs=bilstm_input_layer, outputs=bilstm_output_layer)

model_bilstm.compile(loss="binary_crossentropy", optimizer="adam", metrics=[CategoricalAccuracy(name="categorical_accuracy"), AUC(name="auc")])

model_bilstm.fit(X, Y, batch_size=32, epochs=3, shuffle=True, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1961caad250>

In [5]:
model_bilstm.save("./models/SSC.h5")