In [10]:
import numpy as np
import json
import pandas as pd

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [42]:
vocab_size = 10000
embedding_dim = 32
max_length = 16
trunc_type = 'post'
oov_tok = "<OOV>"
training_size = 20000

In [16]:
file_path = 'C:\Sarcasm_Headlines_Dataset.json'
sarcasm = pd.read_json(file_path1,lines=True)
sarcasm = sarcasm[['headline','is_sarcastic']]
sarcasm.head(10)

Unnamed: 0,headline,is_sarcastic
0,former versace store clerk sues over secret 'b...,0
1,the 'roseanne' revival catches up to our thorn...,0
2,mom starting to fear son's web series closest ...,1
3,"boehner just wants wife to listen, not come up...",1
4,j.k. rowling wishes snape happy birthday in th...,0
5,advancing the world's women,0
6,the fascinating case for eating lab-grown meat,0
7,"this ceo will send your kids to school, if you...",0
8,top snake handler leaves sinking huckabee camp...,1
9,friday's morning email: inside trump's presser...,0


In [19]:
training_sentences = sarcasm['headline'][0:training_size]
testing_sentences = sarcasm['headline'][training_size:]

training_labels = sarcasm['is_sarcastic'][0:training_size]
testing_labels = sarcasm['is_sarcastic'][training_size:]

In [20]:
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length)

In [21]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 32, 16)            160000    
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 24)                12312     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 25        
Total params: 172,337
Trainable params: 172,337
Non-trainable params: 0
_________________________________________________________________


In [43]:
num_epochs = 30
model.fit(padded, training_labels, epochs=num_epochs, 
          validation_data=(testing_padded, testing_labels), verbose=2)

Train on 20000 samples, validate on 6709 samples
Epoch 1/30
20000/20000 - 1s - loss: 3.8001e-04 - accuracy: 0.9998 - val_loss: 2.4825 - val_accuracy: 0.8000
Epoch 2/30
20000/20000 - 1s - loss: 3.8006e-04 - accuracy: 0.9998 - val_loss: 2.4955 - val_accuracy: 0.8007
Epoch 3/30
20000/20000 - 1s - loss: 3.7981e-04 - accuracy: 0.9998 - val_loss: 2.5073 - val_accuracy: 0.8006
Epoch 4/30
20000/20000 - 1s - loss: 3.8007e-04 - accuracy: 0.9998 - val_loss: 2.5233 - val_accuracy: 0.8010
Epoch 5/30
20000/20000 - 1s - loss: 3.8020e-04 - accuracy: 0.9998 - val_loss: 2.5366 - val_accuracy: 0.8009
Epoch 6/30
20000/20000 - 1s - loss: 3.8016e-04 - accuracy: 0.9998 - val_loss: 2.5502 - val_accuracy: 0.8004
Epoch 7/30
20000/20000 - 1s - loss: 3.7987e-04 - accuracy: 0.9998 - val_loss: 2.5630 - val_accuracy: 0.8003
Epoch 8/30
20000/20000 - 1s - loss: 3.8032e-04 - accuracy: 0.9998 - val_loss: 2.5798 - val_accuracy: 0.8012
Epoch 9/30
20000/20000 - 1s - loss: 3.7999e-04 - accuracy: 0.9998 - val_loss: 2.5917 - 

<tensorflow.python.keras.callbacks.History at 0x2b1da1b2b08>