In [2]:
#https://teddylee777.github.io/tensorflow/news-sarcasm
!pip install tensorflow==2.1.0
import json
import tensorflow as tf
import numpy as np
import urllib

from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

url = 'https://storage.googleapis.com/download.tensorflow.org/data/sarcasm.json'
urllib.request.urlretrieve(url, 'sarcasm.json')

vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000
batch_size = 256
sentences = []
labels = []

with open('sarcasm.json', 'r') as f:
    datastore = json.load(f)

for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])


tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok) #빈도수가 높은 단어 top 1000개 
tokenizer.fit_on_texts(sentences)

sequences = tokenizer.texts_to_sequences(sentences) #텍스트 > 숫자 

padded = pad_sequences(sequences, truncating=trunc_type, padding=padding_type, maxlen=max_length) #(,120)으로 설정

#train, valid 분리
train_padded = padded[:training_size]
train_labels = labels[:training_size]

validation_padded = padded[training_size:]
validation_labels = labels[training_size:]

train_labels = np.array(train_labels)
validation_labels = np.array(validation_labels)

model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length), #16차원으로축소
    Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    Bidirectional(tf.keras.layers.LSTM(64)),
    Flatten(),
    Dense(16, activation='relu'),
    Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
                loss='binary_crossentropy', 
                metrics=['accuracy'])

checkpoint_path = 'my_checkpoint.ckpt'
sarcasm_checkpoint = ModelCheckpoint(checkpoint_path, 
                                                save_weights_only=True, 
                                                save_best_only=True, 
                                                monitor='val_loss',
                                                verbose=1)



hist = model.fit(train_padded, train_labels, 
            validation_data=(validation_padded, validation_labels),
            callbacks=[sarcasm_checkpoint],batch_size=batch_size,
            epochs=10)



Collecting tensorflow==2.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/85/d4/c0cd1057b331bc38b65478302114194bd8e1b9c2bbc06e300935c0e93d90/tensorflow-2.1.0-cp36-cp36m-manylinux2010_x86_64.whl (421.8MB)
[K     |████████████████████████████████| 421.8MB 27kB/s 
Collecting tensorboard<2.2.0,>=2.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/d9/41/bbf49b61370e4f4d245d4c6051dfb6db80cec672605c91b1652ac8cc3d38/tensorboard-2.1.1-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.9MB 48.1MB/s 
Collecting tensorflow-estimator<2.2.0,>=2.1.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/18/90/b77c328a1304437ab1310b463e533fa7689f4bfc41549593056d812fab8e/tensorflow_estimator-2.1.0-py2.py3-none-any.whl (448kB)
[K     |████████████████████████████████| 450kB 40.1MB/s 
Collecting keras-applications>=1.0.8
[?25l  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/K