<a href="https://colab.research.google.com/github/ktyingith/test/blob/main/keras_imdb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###IMDB 데이터셋 로드하기###

In [None]:
NUM_WORDS = 10000

from keras.datasets import imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=NUM_WORDS)
word_index = imdb.get_word_index()
revers_word_index = dict( [(value, key) for (key, value) in word_index.items()])


In [None]:
import numpy as np

def printts(prefix, tensor) :
  print("{} type:{} rank:{} size:{} shape:{}".format(
        prefix, tensor.dtype, tensor.ndim, tensor.size, tensor.shape))
  print("{}".format(tensor[0]))

def vectorize_sequences(sequences, dimension=NUM_WORDS) :
  results = np.zeros( (len(sequences), dimension) )
  for i, sequence in enumerate(sequences) :
    results[i, sequence] = 1
  return results

In [None]:
printts("train_data", train_data)
printts("train_labels", train_labels)

assert max( [ max(item) for item in train_data ] ) < NUM_WORDS
decoded_review = ' '.join( [revers_word_index.get(i - 3, "?") for i in train_data[0] ] )
print(decoded_review)

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [None]:
printts("x_train", x_train)
printts("y_train", y_train)

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add( layers.Dense(16, activation='relu', input_shape=(NUM_WORDS,)) )
model.add( layers.Dense(16, activation='relu'))
model.add( layers.Dense(1, activation='sigmoid') )
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

history = model.fit(partial_x_train, partial_y_train, epochs=20, batch_size=512, validation_data=(x_val, y_val))

In [None]:
history_dict = history.history
history_dict.keys()

import matplotlib.pyplot as plt
loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
plt.clf()
acc = history_dict['acc']
val_acc = history_dict['val_acc']
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
model = models.Sequential()
model.add( layers.Dense(32, activation='relu', input_shape=(NUM_WORDS,)) )
model.add( layers.Dense(32, activation='relu'))
model.add( layers.Dense(1, activation='sigmoid') )
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=3, batch_size=512)
results = model.evaluate(x_test, y_test)
print( results )

In [None]:
model.predict(x_test)