In [14]:
!git add .
!git commit -m "commit"
!git push origin main



[main 19e135e] commit
 1 file changed, 5 insertions(+), 4 deletions(-)


To https://github.com/sinhajiya/DSE318-NLP-Assignment-Solutions.git
   34b3400..19e135e  main -> main


## IMPORTING LIBRARIES

In [1]:
import os
import numpy as np
import pandas as pd
from random import randint, choice, seed
import tensorflow as tf
import re
from collections import Counter

from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler

In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


## LOADING DATASET

In [3]:
!git clone https://github.com/islnlp/Assignment_1_2025

Cloning into 'Assignment_1_2025'...
remote: Enumerating objects: 35, done.[K
remote: Counting objects: 100% (35/35), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 35 (delta 6), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (35/35), 1.06 MiB | 7.38 MiB/s, done.
Resolving deltas: 100% (6/6), done.


In [4]:
!git clone https://github.com/sinhajiya/DSE318-NLP-Assignment-Solutions

Cloning into 'DSE318-NLP-Assignment-Solutions'...
remote: Enumerating objects: 76, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 76 (delta 0), reused 5 (delta 0), pack-reused 70 (from 1)[K
Receiving objects: 100% (76/76), 70.73 MiB | 12.80 MiB/s, done.
Resolving deltas: 100% (13/13), done.


In [5]:
def load_data(name):
  root_fp = f"/content/Assignment_1_2025/{name}"
  train = pd.read_csv(os.path.join(root_fp, "train.csv"))
  val = pd.read_csv(os.path.join(root_fp, "val.csv"))
  train = train.dropna(subset=['Sentence'])
  val = val.dropna(subset=['Sentence'])
  return train, val

In [6]:
def preprocess_text(Sentence):

  # Preprocessing steps:
  # 1. All lower case characters
  # 2. URL removal
  # 3. Multiple dots to single dot
  # 4. Extra spaces to single space
  # 5. Removes non-alphabetic chars

    url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    Sentence = Sentence.lower()
    Sentence = re.sub(url_pattern, "", Sentence)
    Sentence = re.sub(r"\.{2,}", ".", Sentence)
    Sentence = re.sub(r"\s+", " ", Sentence).strip()
    Sentence = re.sub(r"[^a-zA-Z\s]", "", Sentence)
    return Sentence

In [7]:
def load_and_preprocess_data(name):

  train, val = load_data(name)
  train["Sentence_preprocessed"] = train["Sentence"].astype(str).apply(preprocess_text)
  val["Sentence_preprocessed"] = val["Sentence"].astype(str).apply(preprocess_text)

  return train, val

In [8]:
def form_vocab(data,isdataframe=True):

  vocab_size = 0
  vocab = set()
  word2index = dict()  # Gives mapping from index to word
  index2word = dict()  # Gives mapping of word to index

  if isdataframe:
    data = data["Sentence_preprocessed"]

  for sentence in data:
    for word in sentence.split():
      if word not in word2index:
        word2index[word] = vocab_size
        index2word[vocab_size] = word
        vocab.add(word)
        vocab_size += 1
  print(f"Vocabulary of {vocab_size} created")
  return vocab, vocab_size, word2index, index2word


## WORD2VEC

In [9]:
def skip_gram(data, window_size,k=5, isdataframe=True):
  seed(42)

  positive_samples = dict()
  negative_samples = dict()
  target_words = list()
  if isdataframe:
    data=data['Sentence_preprocessed']

  vocab, _, _ , _ = form_vocab(data, isdataframe=False)

  for sentence in data:
    words = sentence.split()
    num_words = len(words)

    for i in range(0,num_words):
        target_words.append(words[i])
        positive_samples[words[i]] = list()
        negative_samples[words[i]] = list()

        for j in range(1,window_size+1):
          if ((i-j)>=0):
            positive_samples[words[i]].append(words[i-j])
          if((i+j)<num_words):
            positive_samples[words[i]].append(words[i+j])
        l=0
        while l<k:
          negative_sample = choice(list(vocab))
          if negative_sample != words[i] and negative_sample not in positive_samples[words[i]]:
            negative_samples[words[i]].append(negative_sample)
            l+=1

    return positive_samples, negative_samples, target_words



In [10]:
def convert_to_index(positive_samples, negative_samples,target_words, word2index):

  target_words_index = list()
  positive_samples_index = dict()
  negative_samples_index = dict()

  for k in (target_words):
    w_idx = word2index[k]
    target_words_index.append(w_idx)

    positive_samples_index[w_idx] = [word2index[i] for i in positive_samples[k]]
    negative_samples_index[w_idx] = [word2index[i] for i in negative_samples[k]]


  return target_words_index, positive_samples_index, negative_samples_index

In [11]:
def form_data(target_words_index, positive_samples_index, negative_samples_index):

  X_train = []
  y_train = []

  for target in target_words_index:
      for pos in positive_samples_index[target]:
          X_train.append([target])
          y_train.append(1) #label =1

      for neg in negative_samples_index[target]:
          X_train.append([target])
          y_train.append(0)  # label = 0

  return np.array(X_train), np.array(y_train)

In [12]:
def create_data(data, window_size,word2index, index2word, k=5, isdataframe=True):

  positive_samples, negative_samples, target_words = skip_gram(data, window_size,k=5, isdataframe=True)
  print("Created the target words and context words pair\n")

  w = index2word[window_size]
  print(f"For word at index {window_size}:\t {w}")
  print("Positive samples are:\t",positive_samples[w])
  print("Negative samples are:\t",negative_samples[w])

  target_words_index, positive_samples_index, negative_samples_index = convert_to_index(positive_samples, negative_samples, target_words, word2index)
  print(f"\nCreated the target words and context words pair using the index for training.\n ")
  print(index2word[window_size], "index:\t", window_size)
  print("Postive samples are:\t",positive_samples_index[window_size])
  print("Negative samples are:\t",negative_samples_index[window_size])

  X_train,y_train = form_data(target_words_index, positive_samples_index, negative_samples_index)
  print("\nCreated data for training.")

  return X_train, y_train

In [13]:
def create_word2vec(X_train, y_train, vocab_size, name, index2word, word2index, epochs=10, batch_size=32):

    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, 100),  # Word Embeddings
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification (positive/negative context)
    ])

    optimizer = tf.keras.optimizers.Adam()
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    device = "/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0"
    with tf.device(device):
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)

    print("Model training complete!")

    embeddings = model.layers[0].get_weights()[0]
    print("The shape of the embedding created is: ",(np.asarray(embeddings)).shape)
    word = index2word[y_train[0]]
    input_vec = tf.convert_to_tensor([word2index[word]])
    embedding_layer = model.layers[0]

    print("Embedding for", word, ":", embedding_layer(input_vec).numpy().tolist())


    np.save(f"{name}_embeddings.npy", embeddings)

    return model

In [14]:
def load_embeddings(name):
   file_path = f"/content/DSE318-NLP-Assignment-Solutions/Assignment2/embeddings/{name}_embeddings.npy"
   return np.load(file_path, allow_pickle=True).astype(np.float32)

## TRAIN FFNN

In [15]:
def prepare_data_for_FFNN(data, word2index,maxlen=30):

  X_words = data['Sentence_preprocessed']
  X= list()

  for sentence in X_words:
    s = [word2index.get(word, 0) for word in sentence.split()]
    X.append(s)

  X = tf.keras.preprocessing.sequence.pad_sequences(X, maxlen=maxlen, padding='post')
  X = np.array(X)
  y = np.array(data['Tag'])
  print(f"Shape of the data is: X: {X.shape} and y: {y.shape}")
  print("\nData is ready")
  return X,y


In [16]:
def ffnn(name,vocab_size,X_train, y_train, epochs = 10, batch_size = 32,isclassbalanced=False):

  if not isclassbalanced:
    print("Resampling data due to class imbalance...\n")
    print(f"Class distribution before resampling: {Counter(y_train)}")
    undersampler = RandomUnderSampler(sampling_strategy='auto', random_state=42)
    X_train, y_train = undersampler.fit_resample(X_train, y_train)
    print(f"Class distribution after resampling: {Counter(y_train)}")


  print("Training the model...\n")
  embeddings = load_embeddings(name)
  model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, 100,weights=[embeddings], trainable=True),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
  optimizer = tf.keras.optimizers.Adam()
  model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

  device = "/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0"
  with tf.device(device):
      model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)

  print("Model training complete!")

  model_filename = f"{name}_model.keras"
  model.save(model_filename)
  print(f"Model saved as {model_filename}")

  return model

## EVALUATE FFNN

In [17]:
def load_model(name):
   model = tf.keras.models.load_model(f"/content/DSE318-NLP-Assignment-Solutions/Assignment2/models/{name}_model.keras")
   print(f"Model {model} loaded successfully!")
   return model

In [18]:
def evaluate(name, data, word2index):
  target_names = [name, f"non_{name}"]
  model = load_model(name)
  X_test,y_true = prepare_data_for_FFNN(data, word2index)
  y_pred_proba = model.predict(X_test)
  y_pred = (y_pred_proba > 0.5).astype(int)
  print(classification_report(y_true, y_pred, target_names=target_names))

## HATE DATASET

In [19]:
hate_train, hate_val = load_and_preprocess_data('hate')

In [20]:
hate_vocab, hate_vocab_size, hate_word2index, hate_index2word = form_vocab(hate_train,isdataframe=True)

Vocabulary of 12934 created


In [21]:
X_hate_train, y_hate_train = create_data(data= hate_train, word2index=hate_word2index, index2word=hate_index2word,window_size = 4 , k=5, isdataframe=True)

Vocabulary of 12934 created
Created the target words and context words pair

For word at index 4:	 teen
Positive samples are:	 ['mey', 'bachchiyo', 'week', 'ke', 'ek', 'saath', 'mey', 'gang']
Negative samples are:	 ['tarfa', 'rajpoot', 'jate', 'awais', 'level']

Created the target words and context words pair using the index for training.
 
teen index:	 4
Postive samples are:	 [1, 5, 3, 6, 2, 7, 1, 8]
Negative samples are:	 [4320, 9733, 1099, 9063, 5247]

Created data for training.


In [22]:
# hate_word2vec_model = create_word2vec(X_hate_train, y_hate_train, hate_vocab_size, 'hate', hate_index2word, hate_word2index, epochs=20, batch_size=32)

Epoch 1/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.4502 - loss: 0.6952
Epoch 2/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5834 - loss: 0.6884 
Epoch 3/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6037 - loss: 0.6860 
Epoch 4/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6115 - loss: 0.6802 
Epoch 5/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6226 - loss: 0.6758 
Epoch 6/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6115 - loss: 0.6740 
Epoch 7/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6086 - loss: 0.6762 
Epoch 8/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5864 - loss: 0.6807 
Epoch 9/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━

In [46]:
hate_X_train, hate_y_train = prepare_data_for_FFNN(hate_train,hate_word2index)

Shape of the data is: X: (3660, 30) and y: (3660,)

Data is ready


In [48]:
# hate_ffnn_model = ffnn('hate', hate_vocab_size, hate_X_train, hate_y_train,epochs=20, isclassbalanced=False)

Resampling data due to class imbalance...

Class distribution before resampling: Counter({np.int64(0): 2307, np.int64(1): 1353})
Class distribution after resampling: Counter({np.int64(0): 1353, np.int64(1): 1353})
Training the model...

Epoch 1/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5411 - loss: 0.6894
Epoch 2/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8340 - loss: 0.4899
Epoch 3/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9851 - loss: 0.0648
Epoch 4/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9992 - loss: 0.0109
Epoch 5/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9998 - loss: 0.0041
Epoch 6/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0029
Epoch 7/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [51]:
evaluate('hate',hate_val, hate_word2index)

Model <Sequential name=sequential_5, built=True> loaded successfully!
Shape of the data is: X: (457, 30) and y: (457,)

Data is ready
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
              precision    recall  f1-score   support

        hate       0.73      0.63      0.68       309
    non_hate       0.40      0.52      0.45       148

    accuracy                           0.60       457
   macro avg       0.57      0.58      0.57       457
weighted avg       0.63      0.60      0.61       457



## SARCASM

In [32]:
sarcasm_train, sarcasm_val = load_and_preprocess_data('sarcasm')

In [33]:
sarcasm_vocab, sarcasm_vocab_size, sarcasm_word2index, sarcasm_index2word = form_vocab(sarcasm_train,isdataframe=True)


Vocabulary of 14559 created


In [34]:
X_sarcasm_train, y_sarcasm_train = create_data(data= sarcasm_train, word2index=sarcasm_word2index, index2word=sarcasm_index2word,window_size = 4 , k=5, isdataframe=True)


Vocabulary of 14559 created
Created the target words and context words pair

For word at index 4:	 black
Positive samples are:	 ['meri', 'display', 'log', 'peh', 'jo', 'chorh', 'mashaallah', 'k']
Negative samples are:	 ['rbhatkal', 'sports', 'noteban', 'cheenen', 'iliyana']

Created the target words and context words pair using the index for training.
 
black index:	 4
Postive samples are:	 [3, 5, 2, 6, 1, 7, 0, 8]
Negative samples are:	 [396, 5502, 4542, 10432, 5812]

Created data for training.


In [35]:
# sarcasm_word2vec_model = create_word2vec(X_sarcasm_train, y_sarcasm_train, sarcasm_vocab_size, 'sarcasm', sarcasm_index2word, sarcasm_word2index, epochs=20, batch_size=32)


Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 146ms/step - accuracy: 0.5352 - loss: 0.6921
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5621 - loss: 0.6885  
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5636 - loss: 0.6871 
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5798 - loss: 0.6858 
Epoch 5/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5941 - loss: 0.6807 
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5995 - loss: 0.6797 
Epoch 7/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5436 - loss: 0.6862 
Epoch 8/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5480 - loss: 0.6868 
Epoch 9/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [52]:
sarcasm_X_train, sarcasm_y_train = prepare_data_for_FFNN(sarcasm_train,sarcasm_word2index)


Shape of the data is: X: (4200, 30) and y: (4200,)

Data is ready


In [54]:
(pd.DataFrame(sarcasm_y_train)).value_counts() #imbalance

Unnamed: 0_level_0,count
0,Unnamed: 1_level_1
0,3797
1,403


In [55]:
# sarcasm_model = ffnn('sarcasm', sarcasm_vocab_size, sarcasm_X_train, sarcasm_y_train,epochs=20, isclassbalanced=False)


Resampling data due to class imbalance...

Class distribution before resampling: Counter({np.int64(0): 3797, np.int64(1): 403})
Class distribution after resampling: Counter({np.int64(0): 403, np.int64(1): 403})
Training the model...

Epoch 1/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.6332 - loss: 0.6733
Epoch 2/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7627 - loss: 0.5098
Epoch 3/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9836 - loss: 0.1934
Epoch 4/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0344
Epoch 5/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0095
Epoch 6/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0047
Epoch 7/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [57]:
evaluate('sarcasm',sarcasm_val, sarcasm_word2index)


Model <Sequential name=sequential_6, built=True> loaded successfully!
Shape of the data is: X: (525, 30) and y: (525,)

Data is ready
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
              precision    recall  f1-score   support

     sarcasm       0.99      0.90      0.94       474
 non_sarcasm       0.49      0.88      0.63        51

    accuracy                           0.90       525
   macro avg       0.74      0.89      0.79       525
weighted avg       0.94      0.90      0.91       525



## Humor

In [59]:
humor_train, humor_val = load_and_preprocess_data('humor')

In [60]:
humor_vocab, humor_vocab_size, humor_word2index, humor_index2word = form_vocab(humor_train,isdataframe=True)


Vocabulary of 7179 created


In [61]:
X_humor_train, y_humor_train = create_data(data= humor_train, word2index=humor_word2index, index2word=humor_index2word,window_size = 4 , k=5, isdataframe=True)


Vocabulary of 7179 created
Created the target words and context words pair

For word at index 4:	 rassi
Positive samples are:	 ['like', 'jal', 'is', 'gayee', 'scindia', 'aunty', 'jyotiraditya', 'ki']
Negative samples are:	 ['dunia', 'quality', 'bitiya', 'africans', 'manjultoons']

Created the target words and context words pair using the index for training.
 
rassi index:	 4
Postive samples are:	 [3, 5, 2, 6, 1, 7, 0, 8]
Negative samples are:	 [5304, 3489, 4706, 2642, 3264]

Created data for training.


In [62]:
# humor_word2vec_model = create_word2vec(X_humor_train, y_humor_train, humor_vocab_size, 'humor', humor_index2word, humor_word2index, epochs=20, batch_size=32)


Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 173ms/step - accuracy: 0.4902 - loss: 0.6942
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.4994 - loss: 0.6936
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5438 - loss: 0.6916
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5605 - loss: 0.6908
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5584 - loss: 0.6899 
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5584 - loss: 0.6885 
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5532 - loss: 0.6887 
Epoch 8/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6122 - loss: 0.6812 
Epoch 9/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [65]:
humor_X_train, humor_y_train = prepare_data_for_FFNN(humor_train,humor_word2index)


Shape of the data is: X: (2360, 30) and y: (2360,)

Data is ready


In [68]:
(pd.DataFrame(humor_y_train)).value_counts()  #class is imbalanced

Unnamed: 0_level_0,count
0,Unnamed: 1_level_1
1,1407
0,953


In [66]:
# humor_ffnn_model = ffnn('humor', humor_vocab_size, humor_X_train, humor_y_train,epochs=20, isclassbalanced=False)


Resampling data due to class imbalance...

Class distribution before resampling: Counter({np.int64(1): 1407, np.int64(0): 953})
Class distribution after resampling: Counter({np.int64(0): 953, np.int64(1): 953})
Training the model...

Epoch 1/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.6224 - loss: 0.6570
Epoch 2/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7418 - loss: 0.5388
Epoch 3/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9699 - loss: 0.1858
Epoch 4/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9934 - loss: 0.0279
Epoch 5/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0069
Epoch 6/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9991 - loss: 0.0039
Epoch 7/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [70]:
evaluate('humor',humor_val, humor_word2index)

Model <Sequential name=sequential_8, built=True> loaded successfully!
Shape of the data is: X: (295, 30) and y: (295,)

Data is ready
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step
              precision    recall  f1-score   support

       humor       0.55      0.72      0.62       119
   non_humor       0.76      0.60      0.67       176

    accuracy                           0.65       295
   macro avg       0.65      0.66      0.65       295
weighted avg       0.67      0.65      0.65       295



In [72]:
# hbv = 42421

# print(hbv)