In [None]:
!pip install keras tensorflow



In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer # Import Tokenizer from tensorflow.keras
from tensorflow.keras.preprocessing.sequence import pad_sequences # Import pad_sequences from tensorflow.keras
from tensorflow.keras.models import Sequential # Import Sequential from tensorflow.keras
from tensorflow.keras.layers import Embedding, Flatten, Dense # Import layers from tensorflow.keras

# Sample texts
texts = [
    "This is a sample text",
    "Keras is great for deep learning"
]

# Step 1: Tokenize the text
max_words = 1000  # maximum number of words to consider
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Step 2: Pad sequences to a fixed length
maxlen = 10  # maximum length of sequences
data = pad_sequences(sequences, maxlen=maxlen)

# For demonstration, we create dummy labels (binary classification)
labels = np.array([0, 1])

# Step 3: Build the model with an Embedding layer
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=50, input_length=maxlen))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

# Check the model architecture
model.summary()

# Step 4: Train the model (using a very small dataset for demonstration)
model.fit(data, labels, epochs=10, batch_size=2)

# Retrieve the learned word embeddings from the Embedding layer
embeddings = model.layers[0].get_weights()[0]
print("Shape of embeddings:", embeddings.shape)

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - acc: 0.5000 - loss: 0.7140
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - acc: 0.5000 - loss: 0.6823
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - acc: 1.0000 - loss: 0.6602
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - acc: 1.0000 - loss: 0.6418
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - acc: 1.0000 - loss: 0.6257
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - acc: 1.0000 - loss: 0.6109
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - acc: 1.0000 - loss: 0.5970
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step - acc: 1.0000 - loss: 0.5838
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step - acc: 1.0000 - loss:

In [None]:
print("Embeddings:", embeddings)

Embeddings: [[ 0.0026506   0.03167318 -0.00054819 ... -0.01083126 -0.00691296
  -0.01052535]
 [ 0.04347993 -0.02872512  0.02649365 ...  0.00228765  0.05752612
  -0.0548771 ]
 [ 0.04481399  0.01836159 -0.00115392 ... -0.03196791 -0.00486782
   0.05824351]
 ...
 [-0.01608217 -0.04868507  0.03808543 ... -0.00443822 -0.04808916
  -0.02283508]
 [-0.02203221 -0.01486505  0.03340841 ...  0.01411179  0.00578618
  -0.02618451]
 [-0.00510939 -0.03236629 -0.01720731 ... -0.01993532 -0.04353203
   0.00100516]]


In [None]:
!pip install openai



In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense

# ---------------------------
# Step 1: Load the IMDB Dataset
# ---------------------------
# We limit our vocabulary to the top 10,000 most frequent words.
max_features = 10000
maxlen = 500  # Each review will be truncated or padded to 500 words

print("Loading data...")
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print("Training samples:", len(x_train))
print("Test samples:", len(x_test))

# ---------------------------
# Step 2: Pad the Sequences
# ---------------------------
# Pad sequences to ensure each input is of the same length.
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# ---------------------------
# Step 3: Build the Model
# ---------------------------
# We create a simple model with an Embedding layer, a GlobalAveragePooling1D layer, and a Dense output layer.
model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=maxlen),
    GlobalAveragePooling1D(),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

# ---------------------------
# Step 4: Train the Model
# ---------------------------
history = model.fit(x_train, y_train,
                    epochs=10,
                    batch_size=32,
                    validation_split=0.2)

# ---------------------------
# Step 5: Evaluate the Model
# ---------------------------
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: 25000
Test samples: 25000




Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 22ms/step - accuracy: 0.5744 - loss: 0.6737 - val_accuracy: 0.7782 - val_loss: 0.5200
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - accuracy: 0.7870 - loss: 0.4786 - val_accuracy: 0.8496 - val_loss: 0.3778
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - accuracy: 0.8587 - loss: 0.3471 - val_accuracy: 0.8276 - val_loss: 0.3762
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - accuracy: 0.8670 - loss: 0.3133 - val_accuracy: 0.8800 - val_loss: 0.3094
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - accuracy: 0.9012 - loss: 0.2586 - val_accuracy: 0.8868 - val_loss: 0.2946
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - accuracy: 0.8972 - loss: 0.2478 - val_accuracy: 0.8212 - val_loss: 0.4048
Epoch 7/10
[1m6