CNN

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Activation
from keras.callbacks import EarlyStopping 
from keras import utils
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.optimizers import RMSprop
df= pd.read_csv("Embedded_data.csv")
train = df.drop(['Tokens'], axis = 1)

X = train.Text
Y = train.Label
le = LabelEncoder() 
Y = le.fit_transform(Y)
Y = Y.reshape(-1,1)

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15)

max_words = 1000
max_len = 200
tok = Tokenizer(num_words=max_words)
X_train = X_train.astype(str)
tok.fit_on_texts(X_train)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = utils.pad_sequences(sequences,maxlen=max_len)

def CNN():
    inputs = Input(name='inputs', shape=[max_len])
    layer = Embedding(max_words, 100, input_length=max_len)(inputs)
    layer = Conv1D(filters=512, kernel_size=3, padding='same', activation='relu')(layer)
    layer = MaxPooling1D(pool_size=2)(layer)
    layer = Conv1D(filters=128, kernel_size=3, padding='same', activation='relu')(layer)
    layer = MaxPooling1D(pool_size=2)(layer)
    layer = Flatten()(layer)
    layer = Dense(512, name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.5)(layer)
    layer = Dense(1, name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs, outputs=layer)
    return model


In [7]:
model = CNN()
model.summary()
model.compile(loss='binary_crossentropy',optimizer=RMSprop(),metrics=['accuracy'])



In [8]:
model.fit(sequences_matrix,Y_train,batch_size=128,epochs=10,
          validation_split=0.2,callbacks=[EarlyStopping(monitor='val_loss',min_delta=0.0001)])

Epoch 1/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 236ms/step - accuracy: 0.5871 - loss: 0.6785 - val_accuracy: 0.5932 - val_loss: 0.6769
Epoch 2/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 234ms/step - accuracy: 0.6066 - loss: 0.6588 - val_accuracy: 0.6521 - val_loss: 0.6188
Epoch 3/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 228ms/step - accuracy: 0.7007 - loss: 0.5708 - val_accuracy: 0.6534 - val_loss: 0.6040
Epoch 4/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 235ms/step - accuracy: 0.7436 - loss: 0.5155 - val_accuracy: 0.6658 - val_loss: 0.5912
Epoch 5/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 232ms/step - accuracy: 0.7596 - loss: 0.4851 - val_accuracy: 0.7005 - val_loss: 0.6173


<keras.src.callbacks.history.History at 0x24a2cb695e0>

In [9]:
X_test = X_test.astype(str).tolist()

# Now process X_test with the tokenizer and pad_sequences
test_sequences = tok.texts_to_sequences(X_test)
test_sequences_matrix = utils.pad_sequences(test_sequences, maxlen=max_len)

# Evaluate the model on the test data
accr = model.evaluate(test_sequences_matrix, Y_test)

test_sequences = tok.texts_to_sequences(X_test)
test_sequences_matrix = utils.pad_sequences(test_sequences,maxlen=max_len)

accr = model.evaluate(test_sequences_matrix,Y_test)
print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],accr[1]))

[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.7355 - loss: 0.5689
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.7355 - loss: 0.5689
Test set
  Loss: 0.587
  Accuracy: 0.727


RNN

In [3]:
from keras.layers import Input, Embedding, Dense, Dropout, Activation, LSTM
from keras.optimizers import RMSprop
def RNN():
    inputs = Input(name='inputs',shape=[max_len])
    layer = Embedding(max_words,50,input_length=max_len)(inputs)
    layer = LSTM(512)(layer)
    layer = Dense(256,name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.5)(layer)
    layer = Dense(1,name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs,outputs=layer)
    return model

In [4]:
model = RNN()
model.summary()
model.compile(loss='binary_crossentropy',optimizer=RMSprop(),metrics=['accuracy'])

In [5]:
model.fit(sequences_matrix,Y_train,batch_size=128,epochs=10,
          validation_split=0.2,callbacks=[EarlyStopping(monitor='val_loss',min_delta=0.0001)])

Epoch 1/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 2s/step - accuracy: 0.5973 - loss: 0.6756 - val_accuracy: 0.6086 - val_loss: 0.6626
Epoch 2/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 2s/step - accuracy: 0.6335 - loss: 0.6363 - val_accuracy: 0.6614 - val_loss: 0.6216
Epoch 3/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m328s[0m 4s/step - accuracy: 0.7009 - loss: 0.5814 - val_accuracy: 0.6651 - val_loss: 0.6177
Epoch 4/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 4s/step - accuracy: 0.7185 - loss: 0.5574 - val_accuracy: 0.6584 - val_loss: 0.6187


<keras.src.callbacks.history.History at 0x241ca8101a0>

In [7]:

X_test = X_test.astype(str).tolist()

# Now process X_test with the tokenizer and pad_sequences
test_sequences = tok.texts_to_sequences(X_test)
test_sequences_matrix = utils.pad_sequences(test_sequences, maxlen=max_len)

# Evaluate the model on the test data
accr = model.evaluate(test_sequences_matrix, Y_test)

test_sequences = tok.texts_to_sequences(X_test)
test_sequences_matrix = utils.pad_sequences(test_sequences,maxlen=max_len)

accr = model.evaluate(test_sequences_matrix,Y_test)
print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],accr[1]))

[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 480ms/step - accuracy: 0.6629 - loss: 0.6097
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 476ms/step - accuracy: 0.6629 - loss: 0.6097
Test set
  Loss: 0.608
  Accuracy: 0.661
