In [1]:
import json
import tensorflow as tf
import numpy as np
import urllib
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
url = 'https://storage.googleapis.com/download.tensorflow.org/data/sarcasm.json'
urllib.request.urlretrieve(url, 'sarcasm.json')

('sarcasm.json', <http.client.HTTPMessage at 0x28dce029ac0>)

In [18]:
vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000

sentences = []
labels = []

In [22]:
with open("sarcasm.json", "r") as json_file:
    data = json.load(json_file)
    for row in data:
        sentences.append(row['headline'])
        labels.append(int(row['is_sarcastic']))
                      
labels=np.array(labels)
    


In [23]:
train_sentences = sentences[:training_size]
train_labels = labels[:training_size]

validation_sentences = sentences[training_size:]
validation_labels = labels[training_size:]

In [24]:
print(training_size)
print(len(train_sentences))
print(len(train_labels))
print(len(validation_sentences))
print(len(validation_labels))

20000
20000
20000
6709
6709


In [37]:
tokenizer =Tokenizer(num_words=vocab_size,oov_token=oov_tok)
tokenizer.fit_on_texts(train_sentences)
word_index = tokenizer.word_index

train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_padded = pad_sequences(train_sequences,maxlen=max_length,truncating=trunc_type,padding=padding_type)
validation_sequences=tokenizer.texts_to_sequences(validation_sentences)
validation_padded=pad_sequences(validation_sequences,maxlen=max_length,truncating=trunc_type,padding=padding_type)

In [41]:
model = tf.keras.Sequential([tf.keras.layers.Embedding(input_dim=vocab_size,input_length=120,output_dim=16),
                           tf.keras.layers.Conv1D(128,5,activation='relu'),
                           tf.keras.layers.GlobalAveragePooling1D(),
                           tf.keras.layers.Dense(units=6,activation='relu'),
                           tf.keras.layers.Dense(units=6,activation='relu'),
                           tf.keras.layers.Dense(1,activation='sigmoid')

# YOUR CODE HERE
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['acc'])
model.summary()

num_epochs = 30
history = model.fit(x=train_padded,y=train_labels,validation_data=(validation_padded,validation_labels),epochs=num_epochs)


Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, 120, 16)           16000     
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 116, 128)          10368     
_________________________________________________________________
global_average_pooling1d_10  (None, 128)               0         
_________________________________________________________________
dense_26 (Dense)             (None, 6)                 774       
_________________________________________________________________
dense_27 (Dense)             (None, 6)                 42        
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 7         
Total params: 27,191
Trainable params: 27,191
Non-trainable params: 0
_________________________________________________

In [50]:
model=tf.keras.Sequential([tf.keras.layers.Embedding(input_dim=vocab_size,input_length=120,output_dim=16),
                           tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
                           tf.keras.layers.Dense(units=16,activation='relu'),
                           tf.keras.layers.Dropout(0.2),
                           tf.keras.layers.Dense(units=16,activation='relu'),
                           tf.keras.layers.Dense(1,activation='sigmoid')])

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['acc'])
model.summary()

num_epochs = 10
history = model.fit(x=train_padded,y=train_labels,validation_data=(validation_padded,validation_labels),epochs=num_epochs)

Model: "sequential_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_21 (Embedding)     (None, 120, 16)           16000     
_________________________________________________________________
bidirectional_13 (Bidirectio (None, 64)                12544     
_________________________________________________________________
dense_44 (Dense)             (None, 16)                1040      
_________________________________________________________________
dropout (Dropout)            (None, 16)                0         
_________________________________________________________________
dense_45 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_46 (Dense)             (None, 1)                 17        
Total params: 29,873
Trainable params: 29,873
Non-trainable params: 0
_________________________________________________

In [55]:

model=tf.keras.Sequential([tf.keras.layers.Embedding(input_dim=vocab_size,input_length=120,output_dim=16),
                           tf.keras.layers.Conv1D(filters=32, kernel_size=5,
                               strides=1, padding="causal",
                               activation="relu",
                               input_shape=[None, 1]),
                           tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
                           tf.keras.layers.Dense(units=6,activation='relu'),
                           tf.keras.layers.Dense(1,activation='sigmoid')])

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['acc'])
model.summary()

num_epochs = 30
history = model.fit(x=train_padded,y=train_labels,validation_data=(validation_padded,validation_labels),epochs=num_epochs)

Model: "sequential_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_22 (Embedding)     (None, 120, 16)           16000     
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 120, 32)           2592      
_________________________________________________________________
bidirectional_14 (Bidirectio (None, 64)                12672     
_________________________________________________________________
dense_50 (Dense)             (None, 6)                 390       
_________________________________________________________________
dense_51 (Dense)             (None, 1)                 7         
Total params: 31,661
Trainable params: 31,661
Non-trainable params: 0
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30

KeyboardInterrupt: 