In [1]:
import numpy as np
import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Embedding, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences

word_count = 8000
max_review_length = 500 
embedding_dim = 30

#--------------loading pre-processed train and test data--------------
def load_data():
    x_train = np.load('x_train_reviews.npy')
    x_test = np.load('x_test_reviews.npy')
    y_train = np.load('y_train_labels.npy')
    y_test = np.load('y_test_labels.npy')

    #converting labels to binary
    y_train = np.argmax(y_train, axis=1)
    y_test = np.argmax(y_test, axis=1)

    return x_train, x_test, y_train, y_test

#--------------cnn model architecture--------------
def create_model(word_count, max_review_length, embedding_dim):

    model = Sequential()
    
    model.add(Embedding(input_dim=word_count, output_dim=embedding_dim, input_length=max_review_length))
    model.add(Conv1D(filters=30, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

#--------------training cnn model--------------
def train_and_evaluate(model, x_train, y_train, x_test, y_test, epochs=8, batch_size=128):
    
    history = model.fit(
        x_train, y_train,
        validation_data=(x_test, y_test),
        epochs=epochs,
        batch_size=batch_size
    )

    #predicting the labels for the test set
    y_pred_prob = model.predict(x_test)
    y_pred = (y_pred_prob >= 0.5).astype(int)

    #calculating accuracy, precision, and recall
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')

    return model


x_train, x_test, y_train, y_test = load_data()
model = create_model(word_count, max_review_length, embedding_dim)
model = train_and_evaluate(model, x_train, y_train, x_test, y_test)
model.save('CNN_sentiment_analysis_model.h5')


Epoch 1/8




[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - accuracy: 0.6051 - loss: 0.6247 - val_accuracy: 0.8086 - val_loss: 0.4351
Epoch 2/8
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 34ms/step - accuracy: 0.8894 - loss: 0.2719 - val_accuracy: 0.8876 - val_loss: 0.2704
Epoch 3/8
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 43ms/step - accuracy: 0.9396 - loss: 0.1666 - val_accuracy: 0.8840 - val_loss: 0.2878
Epoch 4/8
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 57ms/step - accuracy: 0.9598 - loss: 0.1210 - val_accuracy: 0.8729 - val_loss: 0.3319
Epoch 5/8
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 39ms/step - accuracy: 0.9685 - loss: 0.0955 - val_accuracy: 0.8727 - val_loss: 0.3733
Epoch 6/8
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 39ms/step - accuracy: 0.9789 - loss: 0.0697 - val_accuracy: 0.8700 - val_loss: 0.4147
Epoch 7/8
[1m196/196[0m [32m━━━━━



Accuracy: 0.8607
Precision: 0.8900
Recall: 0.8232


In [22]:
model = load_model('CNN_sentiment_analysis_model.h5')

#--------------analyzing sentiment of user review--------------
def analyze_review(review):
    
    #loading tokenizer to pre-process input review 
    with open('tokenizer.pkl', 'rb') as f:
        tokenizer = pickle.load(f)
        
    sequences = tokenizer.texts_to_sequences([review])
    padded_sequences = pad_sequences(sequences, maxlen=max_review_length)

    #predicting user review label
    prediction = model.predict(padded_sequences)[0][0]
    
    label = 'positive' if prediction >= 0.5 else 'negative'
    return label

while True:
    input_prompt = input("Enter a review (or type 'exit' to quit): ")
    if input_prompt.lower() == 'exit':
        print("Exited sentiment analysis.")
        break
    predicted_label = analyze_review(input_prompt)
    print(f'The review is {predicted_label}')




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
The review is positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
The review is positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
The review is negative
Exited sentiment analysis.
