<a href="https://colab.research.google.com/github/prashanth741/NLP-LAB/blob/main/12_9_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, Dense, Input, GlobalAveragePooling1D, Conv1D, MaxPooling1D, Flatten, Dropout, LSTM, Bidirectional
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping

MAX_NUM_WORDS = 30000
MAX_SEQ_LEN = 64
EMBED_DIM = 100

tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, oov_token='<OOV>')
tokenizer.fit_on_texts(X_train.tolist())
Xtr_seq = tokenizer.texts_to_sequences(X_train)
Xte_seq = tokenizer.texts_to_sequences(X_test)
Xtr_pad = pad_sequences(Xtr_seq, maxlen=MAX_SEQ_LEN, padding='post', truncating='post')
Xte_pad = pad_sequences(Xte_seq, maxlen=MAX_SEQ_LEN, padding='post', truncating='post')
vocab_size = min(MAX_NUM_WORDS, len(tokenizer.word_index)+1)

def build_avg_mlp():
    inp = Input(shape=(MAX_SEQ_LEN,))
    emb = Embedding(vocab_size, EMBED_DIM)(inp)
    x = GlobalAveragePooling1D()(emb)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    out = Dense(1, activation='sigmoid')(x)
    model = Model(inp, out)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

def build_cnn():
    model = Sequential()
    model.add(Embedding(vocab_size, EMBED_DIM, input_length=MAX_SEQ_LEN))
    model.add(Conv1D(128, 5, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

def build_bilstm():
    inp = Input(shape=(MAX_SEQ_LEN,))
    emb = Embedding(vocab_size, EMBED_DIM)(inp)
    x = Bidirectional(LSTM(64))(emb)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    out = Dense(1, activation='sigmoid')(x)
    model = Model(inp, out)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

EPOCHS = 6
BATCH = 64
es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Train & evaluate each
for name, builder in [("AvgEmbed-MLP", build_avg_mlp), ("CNN-1D", build_cnn), ("BiLSTM", build_bilstm)]:
    model = builder()
    model.fit(Xtr_pad, y_train, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH, callbacks=[es], verbose=1)
    preds = (model.predict(Xte_pad).ravel() >= 0.5).astype(int)
    print(f"\n{name} results:")
    print(classification_report(y_test, preds, digits=4))


Epoch 1/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 224ms/step - accuracy: 0.5527 - loss: 0.6889 - val_accuracy: 0.2500 - val_loss: 0.7293
Epoch 2/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.5527 - loss: 0.6911 - val_accuracy: 0.2500 - val_loss: 0.7483
Epoch 3/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.5527 - loss: 0.6873 - val_accuracy: 0.2500 - val_loss: 0.7629
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step

AvgEmbed-MLP results:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        12
           1     0.4000    1.0000    0.5714         8

    accuracy                         0.4000        20
   macro avg     0.2000    0.5000    0.2857        20
weighted avg     0.1600    0.4000    0.2286        20

Epoch 1/6


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 373ms/step - accuracy: 0.5382 - loss: 0.6918 - val_accuracy: 0.2500 - val_loss: 0.7303
Epoch 2/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.5527 - loss: 0.6852 - val_accuracy: 0.2500 - val_loss: 0.7912
Epoch 3/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - accuracy: 0.5527 - loss: 0.6821 - val_accuracy: 0.2500 - val_loss: 0.8471
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step

CNN-1D results:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        12
           1     0.4000    1.0000    0.5714         8

    accuracy                         0.4000        20
   macro avg     0.2000    0.5000    0.2857        20
weighted avg     0.1600    0.4000    0.2286        20

Epoch 1/6


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.4514 - loss: 0.6937 - val_accuracy: 0.2500 - val_loss: 0.7072
Epoch 2/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 162ms/step - accuracy: 0.5961 - loss: 0.6892 - val_accuracy: 0.2500 - val_loss: 0.7259
Epoch 3/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 130ms/step - accuracy: 0.5527 - loss: 0.6915 - val_accuracy: 0.2500 - val_loss: 0.7469
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step

BiLSTM results:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        12
           1     0.4000    1.0000    0.5714         8

    accuracy                         0.4000        20
   macro avg     0.2000    0.5000    0.2857        20
weighted avg     0.1600    0.4000    0.2286        20



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
