<a href="https://colab.research.google.com/github/ssk2001/AI-Tech-Lab-Experiments/blob/main/Untitled6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences



In [4]:
import tensorflow as tf

# Load IMDb dataset
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.imdb.load_data(num_words=10000)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
# Pre-proessing the data by adjusting factors like length, padding, margins, nulls, etc
max_length = 200

# Pad sequences for consistent input length
train_data = pad_sequences(train_data, maxlen=max_length, padding='post')
test_data = pad_sequences(test_data, maxlen=max_length, padding='post')


In [6]:
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D

# Define the model
model = Sequential([
    Embedding(input_dim=10000, output_dim=16, input_length=max_length),
    GlobalAveragePooling1D(),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()




In [7]:
# Training model
history = model.fit(train_data, train_labels, epochs=10, batch_size=512, validation_split=0.2)


Epoch 1/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.5415 - loss: 0.6919 - val_accuracy: 0.7122 - val_loss: 0.6814
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.6307 - loss: 0.6775 - val_accuracy: 0.7438 - val_loss: 0.6563
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.6991 - loss: 0.6490 - val_accuracy: 0.7766 - val_loss: 0.6107
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.7468 - loss: 0.6004 - val_accuracy: 0.8080 - val_loss: 0.5534
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.7804 - loss: 0.5473 - val_accuracy: 0.8328 - val_loss: 0.4973
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.8059 - loss: 0.4949 - val_accuracy: 0.8422 - val_loss: 0.4504
Epoch 7/10
[1m40/40[0m [32m━━━━

In [9]:
# Evaluation (Accuracy out of 1 and Loss as close to 0)
loss, accuracy = model.evaluate(test_data, test_labels)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8642 - loss: 0.3495
Test Loss: 0.35157066583633423
Test Accuracy: 0.8624399900436401


In [14]:
# Now we can make predictions

# Define and fit a tokenizer on the IMDb dataset words
vocab_size = 10000
tokenizer = Tokenizer(num_words=vocab_size)
# Get the word index from the IMDb dataset
word_index = tf.keras.datasets.imdb.get_word_index()

# Convert the word index dictionary into a list of words
imdb_words = list(word_index.keys())

# Fit the tokenizer on the list of words
tokenizer.fit_on_texts(imdb_words)

# Example of making predictions on new text samples
sample_reviews = ["This movie was fantastic! I really enjoyed it.", "The movie was boring and too long."]
sample_sequences = tokenizer.texts_to_sequences(sample_reviews)
sample_padded = pad_sequences(sample_sequences, maxlen=max_length, padding='post')

# Make predictions
predictions = model.predict(sample_padded)

# Print predictions (results are probabilities, closer to 1 is positive sentiment, closer to 0 is negative)
for i, review in enumerate(sample_reviews):
    print(f"Review: '{review}'\nPredicted Sentiment: {'Positive' if predictions[i] > 0.5 else 'Negative'}\n")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
Review: 'This movie was fantastic! I really enjoyed it.'
Predicted Sentiment: Negative

Review: 'The movie was boring and too long.'
Predicted Sentiment: Negative

