In [None]:
import json
import tensorflow as tf
import pandas as pd

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
def isSarcastic(prediction, sentence):
    if len(prediction) > 1:
        for i, v in enumerate(prediction):
            if v > 0.5:
                print("\n", sentence[i], "-> is Funny:", str(v[0]*100) + "%")
            else:
                print("\n", sentence[i], "-> is Not Funny:", str(v[0]*100) + "%")
    else:
        if prediction > 0.5:
            print("\n", sentence, "-> is Funny:", prediction)
        else:
            print("\n", sentence, "-> is Not Funny:", prediction)

In [None]:
DESIRED_ACCURACY = 0.922
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if (logs.get('accuracy')>DESIRED_ACCURACY):
            print(f"\nReached {DESIRED_ACCURACY * 100}% accuracy so cancelling training!", )
            self.model.stop_training = True

In [None]:
vocab_size = 10000
embedding_dim = 16
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000

In [None]:
data = pd.read_csv('/dataset-1liner.csv')
data_size = len(data)

In [None]:
data

In [None]:
sentences = data['text']
labels = data['humor']

In [None]:
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

In [None]:
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [None]:
# Need this block to get it to work with TensorFlow 2.x
import numpy as np
training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
num_epochs = 30
callbacks = myCallback()
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2, callbacks=[callbacks])

In [None]:
import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()
  
plot_graphs(history, "accuracy")
plot_graphs(history, "loss")

In [None]:
sentence = [
    "What is black white and red? a penguin with a sunburn", 
    "I want a beer",
    "I want thousands of beers",
    "What’s the best thing about Switzerland? I don’t know, but the flag is a big plus.",
    "Why was six afraid of seven? Because seven eight nine.",
    "My IQ test results came back. They were negative.",
    "What do you call a hippie’s wife? Mississippi.",
    "Why are so many blonde jokes one-liners? So brunettes can remember them.",
    "What do you call a bear with no teeth? A gummy bear.",
    "What do fish say when they hit a concrete wall? Dam!",
    "training time will vary depending on the complexity of the BERT model you have selected.",
    "You can plot the training and validation loss for comparison, as well as the training and validation accuracy",
    "In this plot, the red lines represent the training loss and accuracy, and the blue lines are the validation loss and accuracy",
    "Yesterday I saw a guy spill all his Scrabble letters on the road. I asked him, “What’s the word on the street?",
    "Once my dog ate all the Scrabble tiles. For days he kept leaving little messages around the house.",
    "A woman gets on a bus with her baby. The bus driver says: „That’s the ugliest baby that I’ve ever seen. Ugh!“ The woman goes to the rear of the bus and sits down, fuming. She says to a man next to her: „The driver just insulted me!“ The man says: „You go right up there and tell him off – go ahead, I’ll hold your monkey for you.“",
    "I always tell new hires, Don’t think of me as your boss, think of me as a friend who can fire you.",
    "I would kill for a Nobel Peace Prize.",
    "Amazon has earned a reputation as a disruptor of well-established industries through technological innovation and aggressive reinvestment of profits into capital expenditures.",
    "The NASDAQ Stock Market eventually assumed the majority of major trades that had been executed by the over-the-counter (OTC) system of trading, but there are still many securities traded in this fashion."
]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
isSarcastic(model.predict(padded), sentence)