In [1]:
from keras.datasets import imdb

In [2]:
# Load the data, keeping only 10,000 of the most frequently occuring words
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = 10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


**Vectorize input data**

In [3]:
import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))    # Creates an all zero matrix of shape (len(sequences),10K)
    for i,sequence in enumerate(sequences):
        results[i,sequence] = 1                        # Sets specific indices of results[i] to 1s
    return results

# Vectorize training Data
X_train = vectorize_sequences(train_data)

# Vectorize testing Data
X_test = vectorize_sequences(test_data)

In [4]:
# vectorize labels
y_train = np.asarray(train_labels).astype('float32')
y_test  = np.asarray(test_labels).astype('float32')

**Model defination**


In [5]:
from keras import models
from keras import layers

model = models.Sequential(name="IMDBSentimentClassifier")
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

**Compiling the model**

In [6]:
model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])

In [7]:
import time

# Measure training time
start_time = time.time()

In [8]:
model.summary()

Model: "IMDBSentimentClassifier"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 16)                160016    
                                                                 
 dense_1 (Dense)             (None, 16)                272       
                                                                 
 dense_2 (Dense)             (None, 1)                 17        
                                                                 
Total params: 160305 (626.19 KB)
Trainable params: 160305 (626.19 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


**Training our model**

In [9]:
history = model.fit(X_train,y_train,validation_split=0.2,epochs=20,verbose=1,batch_size=512)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
end_time = time.time()
training_time = end_time - start_time
print("Training time:", training_time, "seconds")

Training time: 35.84884786605835 seconds


**Evaluate the model**

In [11]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

Test accuracy: 0.8582800030708313


In [12]:
# Function to predict sentiment for user input
def predict_sentiment(review):
    # Tokenize and preprocess the user input review
    word_index = imdb.get_word_index()
    review_sequence = [word_index[word] if word in word_index and word_index[word] < 10000 else 0 for word in review.split()]
    review_sequence = [word for word in review_sequence if word != 0]
    review_sequence = vectorize_sequences([review_sequence])

    # Predict sentiment using the trained model
    sentiment_prob = model.predict(review_sequence)[0][0]

    # Decode sentiment prediction
    if sentiment_prob >= 0.5:
        return 'Positive', sentiment_prob
    else:
        return 'Negative', sentiment_prob

In [13]:
# Get user input
user_review = input("Enter your movie review: ")

Enter your movie review: it is good


In [14]:
# Predict sentiment for user input
sentiment, prob = predict_sentiment(user_review)
print(f'Sentiment: {sentiment}, Probability: {prob}')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
Sentiment: Positive, Probability: 0.516062319278717


**GUI**

In [15]:
pip install gradio

Collecting gradio
  Downloading gradio-4.27.0-py3-none-any.whl (17.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.2-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.9/91.9 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.15.1 (from gradio)
  Downloading gradio_client-0.15.1-py3-none-any.whl (313 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━

In [16]:
import gradio as gr

In [17]:
# Create a Gradio interface
iface = gr.Interface(fn=predict_sentiment, inputs="text", outputs="text", title="IMDB Sentiment Analysis")

In [18]:
# Launch the interface
iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://1c61714bbea6206e8a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


