# CountVectorizer + Keras MLP

In [1]:
from ipynb.fs.full.data_loader import load_train_test_data
from ipynb.fs.full.transformers import DenseTransformer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.pipeline import make_pipeline

---

In [2]:
train_X, test_X, train_y, test_y = load_train_test_data()

In [3]:
def create_model():
    model = Sequential()
    
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

In [4]:
clf = make_pipeline(
    CountVectorizer(),
    DenseTransformer(),
    KerasClassifier(create_model, epochs=5, batch_size=32, validation_split=0.1)
)

clf.fit(train_X, train_y)
clf.score(test_X, test_y)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


0.9488818049430847

In [5]:
clf.predict(test_X[:10])



array([[1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0]])

### TfidfVectorizer – performs similarly to CountVectorizer

In [6]:
clf_tf = make_pipeline(
    TfidfVectorizer(),
    DenseTransformer(),
    KerasClassifier(create_model, epochs=5, batch_size=32, validation_split=0.1)
)

clf_tf.fit(train_X, train_y)
clf_tf.score(test_X, test_y)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


0.9520766735076904