# DATASET [IMDB](https://keras.io/api/datasets/imdb/)

In [None]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Zdefiniujmy parametry modelu
max_features = 10000 # liczba słów, które będą użyte w analizie tekstu
maxlen = 200 # maksymalna liczba słów w każdej recenzji
embedding_size = 128 # rozmiar wektora osadzenia słów (embedding)

# Załadujmy zbiór danych IMDB
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=max_features)

# Zmodyfikujmy dane tak, aby każda recenzja miała tę samą długość
train_data = pad_sequences(train_data, maxlen=maxlen)
test_data = pad_sequences(test_data, maxlen=maxlen)

# Zdefiniujmy model
model = Sequential()
model.add(Embedding(max_features, embedding_size, input_length=maxlen))
model.add(Conv1D(32, 7, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(1, activation='sigmoid'))

# Skompilujmy model
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# Trenujmy model
model.fit(train_data, train_labels, epochs=10, batch_size=128, validation_split=0.2)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Tekst:
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    1   13 1247   14   20   18    6  378    7 2512   33    6    2
 7975 6388   31  251   54   43  267  187    4 1108  610  184   52   11
 3335   21    4   20    9  503 1992   13  657   36  586  353    8 3627
  178   19    2 3380   23  503 1992  102   21   45    6 1141  155    8
  168   46   18   10   10   54   13  296   12   13   16 3595  770   12
  679   46    8   30  128   74   13  873   13   16  685   15   12   16
    6  503 1992   21    4  302   26  184   52  434  128   74  135    2
   39 4599   63   47 2136  302    5   12    9   87    8   67  308    2
   11  142

In [None]:
# Przygotowanie słownika mapowania identyfikatorów słów na słowa
word_index = imdb.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

# Dekodowanie losowo wybranej sekwencji tekstowej z testowego zestawu danych
sample = np.random.randint(len(test_data))
text = test_data[sample]
label = test_labels[sample]

# Wartości w zakresie 0-2 są zarezerwowane dla specjalnych znaków, zacznijmy od indeksu 3
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in text])
print(f"Tekst: {decoded_review}")
print(f"Etykieta:{label}")

# Dokonanie predykcji na przykładowych danych
prediction = model.predict(np.array([text]))
print(f"Predykcja: {prediction[0]}")

Tekst: ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? i always liked this movie i have seen it so many times but i always enjoyed it the story is interesting and special but the only thing i have to disagree with is that i don't think max lived in a romanian monastery or what was that p they don't look that way in romania anyway back to the story played pretty well but as someone said before me his english needs to improve br br and there were some funny moments and some ? sad parts too it ? being seen i thought it was sweet that the giant wanted to find his love i recommended to you all it's not the best movie ever but it was nice
Etykieta:1
Predykcja: [0.9997138]
