## LSTM Model

In [23]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

df = pd.read_csv('C:\\Users\\HP\\OneDrive\\Desktop\\t_dataset.csv', index_col=False)

df['Sarcasm'] = df['Sarcasm'].apply(lambda x: 1 if x.lower() == 'yes' else 0)  # Map 'yes' to 1, 'no' to 0

max_len = 100  # Maximum sequence length
vocab_size = 10000  # Limit on the number of words in vocabulary
embedding_dim = 128  # Dimensionality of word embeddings

texts = df['Tweet'].astype(str)
labels = df['Sarcasm'].astype(int)

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len)

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Define the Bidirectional LSTM model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(32)))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5)

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

new_text = "This new restaurant is a real gem, NOT."
sequence = tokenizer.texts_to_sequences([new_text])
padded_sequence = pad_sequences(sequence, maxlen=max_len)
prediction = model.predict(padded_sequence)
if prediction > 0.5:
    print("This text is predicted to be sarcastic.")
else:
    print("This text is predicted to be non-sarcastic.")


Epoch 1/20




[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 268ms/step - accuracy: 0.6045 - loss: 0.6500 - val_accuracy: 0.8191 - val_loss: 0.4112
Epoch 2/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 272ms/step - accuracy: 0.8293 - loss: 0.3986 - val_accuracy: 0.8057 - val_loss: 0.4099
Epoch 3/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 248ms/step - accuracy: 0.9254 - loss: 0.2188 - val_accuracy: 0.7638 - val_loss: 0.5189
Epoch 4/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 269ms/step - accuracy: 0.9678 - loss: 0.1095 - val_accuracy: 0.7571 - val_loss: 0.6097
Epoch 5/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 254ms/step - accuracy: 0.9843 - loss: 0.0660 - val_accuracy: 0.7722 - val_loss: 0.6776
Epoch 6/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 250ms/step - accuracy: 0.9822 - loss: 0.0546 - val_accuracy: 0.7370 - val_loss: 0.8967
Epoch 7/20
[1m75/75[0m [32m━━━

## GRU Model

In [25]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout

max_len = 100  
vocab_size = 10000  
embedding_dim = 128  

texts = df['Tweet'] 
labels = df['Sarcasm']  

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

padded_sequences = pad_sequences(sequences, maxlen=max_len)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Define the GRU model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GRU(64, return_sequences=True))  
model.add(Dropout(0.2))
model.add(GRU(32))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid')) 

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

new_text = "This new restaurant is a real gem, NOT." 
sequence = tokenizer.texts_to_sequences([new_text])
padded_sequence = pad_sequences(sequence, maxlen=max_len)
prediction = model.predict(padded_sequence)
if prediction > 0.5:
    print("This text is predicted to be sarcastic.")
else:
    print("This text is predicted to be non-sarcastic.")

Epoch 1/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 240ms/step - accuracy: 0.6076 - loss: 0.6484 - val_accuracy: 0.7906 - val_loss: 0.4510
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 183ms/step - accuracy: 0.8507 - loss: 0.3423 - val_accuracy: 0.7655 - val_loss: 0.4973
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 178ms/step - accuracy: 0.9293 - loss: 0.1914 - val_accuracy: 0.7554 - val_loss: 0.5683
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 196ms/step - accuracy: 0.9721 - loss: 0.0915 - val_accuracy: 0.7454 - val_loss: 0.7832
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 199ms/step - accuracy: 0.9921 - loss: 0.0357 - val_accuracy: 0.7219 - val_loss: 1.1292
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 209ms/step - accuracy: 0.9931 - loss: 0.0255 - val_accuracy: 0.7387 - val_loss: 1.0466
Epoch 7/10
[1m75/75[

## CNN Model

In [27]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout

max_len = 100 
vocab_size = 10000 
embedding_dim = 128
filter_sizes = [3, 4, 5] 
num_filters = 64 

texts = df['Tweet']  
labels = df['Sarcasm'] 

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

padded_sequences = pad_sequences(sequences, maxlen=max_len)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

embedding_layer = Embedding(vocab_size, embedding_dim, input_length=max_len)

# Define the CNN model
model = Sequential()
model.add(embedding_layer)

for filter_size in filter_sizes:
    conv_layer = Conv1D(num_filters, filter_size, activation='tanh', padding='same')
    gated_conv_layer = Conv1D(num_filters, filter_size, activation='sigmoid', padding='same')
    model.add(conv_layer)
    model.add(gated_conv_layer)
    model.add(Dropout(0.2))
model.add(GlobalMaxPooling1D())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid')) 

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

new_text = "This new restaurant is a real gem, NOT."  # Replace with your text
sequence = tokenizer.texts_to_sequences([new_text])
padded_sequence = pad_sequences(sequence, maxlen=max_len)
prediction = model.predict(padded_sequence)
if prediction > 0.5:
    print("This text is predicted to be sarcastic.")
else:
    print("This text is predicted to be non-sarcastic.")

Epoch 1/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 143ms/step - accuracy: 0.5325 - loss: 0.7092 - val_accuracy: 0.5812 - val_loss: 0.6817
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 116ms/step - accuracy: 0.5040 - loss: 0.7060 - val_accuracy: 0.5812 - val_loss: 0.6853
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 73ms/step - accuracy: 0.5543 - loss: 0.6906 - val_accuracy: 0.5812 - val_loss: 0.6827
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 109ms/step - accuracy: 0.5687 - loss: 0.6860 - val_accuracy: 0.5812 - val_loss: 0.6922
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 135ms/step - accuracy: 0.5624 - loss: 0.6888 - val_accuracy: 0.5812 - val_loss: 0.6809
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 109ms/step - accuracy: 0.5857 - loss: 0.6733 - val_accuracy: 0.6884 - val_loss: 0.6004
Epoch 7/10
[1m75/75[0m [

## RNN Model

In [43]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical  # For categorical crossentropy

max_len = 100
vocab_size = 10000
embedding_dim = 128
texts = df['Tweet']

labels = df['Sarcasm'].map({'sarcasm': 1, 'non-sarcasm': 0}).fillna(0).astype(int) 

labels = to_categorical(labels, num_classes=2) 

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len)

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Define the RNN model (consider Bidirectional LSTM or hyperparameter tuning)
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(LSTM(64, return_sequences=True)) 
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))  # Experiment with more epochs

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")


new_text = "This new restaurant is a real gem, NOT."
sequence = tokenizer.texts_to_sequences([new_text])
padded_sequence = pad_sequences(sequence, maxlen=max_len)

prediction = model.predict(padded_sequence)
predicted_class = prediction.argmax(axis=1)[0]  

if predicted_class == 1:
  print("This text is predicted to be sarcastic.")
else:
  print("This text is predicted to be non-sarcastic.")



Epoch 1/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 183ms/step - accuracy: 0.9960 - loss: 0.1657 - val_accuracy: 1.0000 - val_loss: 2.7129e-04
Epoch 2/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 132ms/step - accuracy: 1.0000 - loss: 3.8461e-04 - val_accuracy: 1.0000 - val_loss: 1.2580e-04
Epoch 3/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 178ms/step - accuracy: 1.0000 - loss: 1.9432e-04 - val_accuracy: 1.0000 - val_loss: 8.0815e-05
Epoch 4/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 201ms/step - accuracy: 1.0000 - loss: 1.3744e-04 - val_accuracy: 1.0000 - val_loss: 5.8036e-05
Epoch 5/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 180ms/step - accuracy: 1.0000 - loss: 1.0045e-04 - val_accuracy: 1.0000 - val_loss: 4.2834e-05
Epoch 6/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 178ms/step - accuracy: 1.0000 - loss: 7.1472e-05 - val_accuracy: 1.0000 -