In [1]:
import json
import tensorflow as tf
import numpy as np
import urllib
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
from tensorflow.keras.callbacks import Callback

class MyCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('val_accuracy') > 0.80 and logs.get('accuracy') > 0.80:
            print("\nReached 80% validation accuracy, stopping training!")
            self.model.stop_training = True

def solution():
    data_url = 'https://github.com/dicodingacademy/assets/raw/main/Simulation/machine_learning/sarcasm.json'
    urllib.request.urlretrieve(data_url, 'sarcasm.json')

    vocab_size = 1000
    embedding_dim = 16
    max_length = 120
    trunc_type = 'post'
    padding_type = 'post'
    oov_tok = "<OOV>"
    training_size = 20000

    sentences = []
    labels = []
    with open("./sarcasm.json", 'r') as f:
        sarcasm = json.load(f)
    for item in sarcasm:
        sentences.append(item['headline'])
        labels.append(item['is_sarcastic'])

    training_sentences = sentences[0:training_size]
    testing_sentences = sentences[training_size:]

    training_labels = labels[0:training_size]
    testing_labels = labels[training_size:]

    tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
    tokenizer.fit_on_texts(training_sentences)
    word_index = tokenizer.word_index
    print(f'number of words in word_index: {len(word_index)}')

    training_sequences = tokenizer.texts_to_sequences(training_sentences)
    training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

    testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
    testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

    training_labels = np.array(training_labels)
    testing_labels = np.array(testing_labels)

    callbacks = MyCallback()

    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(24, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(training_padded, training_labels, epochs=30, validation_data=(testing_padded, testing_labels),
              verbose=2, callbacks=callbacks)
    return model, tokenizer, max_length, padding_type, trunc_type

model, tokenizer, max_length, padding_type, trunc_type = solution()

# Save the model and tokenizer after training
model.save('sarcasm_model.h5')

# Save the tokenizer
import pickle
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Save the other parameters
import json
params = {
    'max_length': max_length,
    'padding_type': padding_type,
    'trunc_type': trunc_type
}
with open('params.json', 'w') as json_file:
    json.dump(params, json_file)


number of words in word_index: 25637
Epoch 1/30
625/625 - 4s - loss: 0.6750 - accuracy: 0.5688 - val_loss: 0.6271 - val_accuracy: 0.6263 - 4s/epoch - 6ms/step
Epoch 2/30
625/625 - 2s - loss: 0.5021 - accuracy: 0.7676 - val_loss: 0.4523 - val_accuracy: 0.7842 - 2s/epoch - 4ms/step
Epoch 3/30

Reached 80% validation accuracy, stopping training!
625/625 - 3s - loss: 0.4061 - accuracy: 0.8162 - val_loss: 0.4150 - val_accuracy: 0.8074 - 3s/epoch - 5ms/step


  saving_api.save_model(


In [4]:
import re

def get_title(url):
    match = re.search(r'\/([^\/]+)\/?$', url)
    if match:
        title_with_dashes = match.group(1)
        title_with_spaces = title_with_dashes.replace("-", " ")
        return title_with_spaces
    else:
        return None

# Test cases
url1 = "https://babylonbee.com/news/star-wars-fans-upset-the-acolyte-full-of-woke-stuff-instead-of-siblings-kissing"
url2 = "https://babylonbee.com/news/star-wars-fans-upset-the-acolyte-full-of-woke-stuff-instead-of-siblings-kissing/"

print(get_title(url1))  # Output: star wars fans upset the acolyte full of woke stuff instead of siblings kissing
print(get_title(url2))  # Output: star wars fans upset the acolyte full of woke stuff instead of siblings kissing


star wars fans upset the acolyte full of woke stuff instead of siblings kissing
star wars fans upset the acolyte full of woke stuff instead of siblings kissing
