In [1]:
#pip install keras-tuner


In [2]:
#! pip install tensorflow scikit-learn pandas numpy pickle

# Import libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
import pickle





In [2]:
# Load the dataset
df = pd.read_csv('/content/tripadvisor_hotel_reviews.csv')
print(df.head())

                                              Review  Rating
0  nice hotel expensive parking got good deal sta...       4
1  ok nothing special charge diamond member hilto...       2
2  nice rooms not 4* experience hotel monaco seat...       3
3  unique, great stay, wonderful time hotel monac...       5
4  great stay great stay, went seahawk game aweso...       5


In [3]:
# Preprocess the dataset
df = df[['Review', 'Rating']]
df['sentiment'] = df['Rating'].apply(lambda x: 'positive' if x > 3 else 'negative' if x < 3 else 'neutral')
df = df[['Review', 'sentiment']]
df = df.sample(frac=1).reset_index(drop=True)

In [4]:
# Tokenize and pad the review sequences
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(df['Review'])
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(df['Review'])
padded_sequences = pad_sequences(sequences, maxlen=100, truncating='post')

In [5]:
# Convert the sentiment labels to one-hot encoding
sentiment_labels = pd.get_dummies(df['sentiment']).values

In [6]:
# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(padded_sequences, sentiment_labels, test_size=0.2)

In [15]:
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
from keras_tuner import HyperParameters

def build_model(hp):
    model = Sequential()
    model.add(Embedding(input_dim=5000,
                        output_dim=hp.Int('embedding_output_dim', min_value=50, max_value=200, step=50),
                        input_length=100))
    model.add(Conv1D(filters=hp.Int('filters', min_value=32, max_value=128, step=32),
                     kernel_size=hp.Choice('kernel_size', values=[3, 5, 7]),
                     activation='relu', kernel_regularizer=l2(0.01)))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(units=hp.Int('dense_units_1', min_value=32, max_value=128, step=32),
                    activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(rate=hp.Float('dropout_rate_1', min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(units=hp.Int('dense_units_2', min_value=32, max_value=128, step=32),
                    activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(rate=hp.Float('dropout_rate_2', min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(3, activation='softmax'))

    model.compile(optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [16]:
from keras_tuner import RandomSearch
from keras.callbacks import EarlyStopping
import numpy as np
from sklearn.metrics import accuracy_score

# Define the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=2,
    directory='my_dir',
    project_name='sentiment_analysis'
)

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Search for the best hyperparameters
tuner.search(x_train, y_train, epochs=10, validation_data=(x_test, y_test), callbacks=[early_stopping])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate the best model
y_pred = np.argmax(best_model.predict(x_test), axis=-1)
print("Best Model Accuracy:", accuracy_score(np.argmax(y_test, axis=-1), y_pred))


Trial 10 Complete [00h 07m 56s]
val_accuracy: 0.8506953120231628

Best val_accuracy So Far: 0.8511832356452942
Total elapsed time: 00h 59m 27s
[1m  1/129[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15s[0m 119ms/step

  saveable.load_own_variables(weights_store.get(inner_path))


[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step
Best Model Accuracy: 0.85240302512808


In [45]:
# Print classification report
y_true = np.argmax(y_test, axis=-1)
print("Classification Report:")
print(classification_report(y_true, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.77      0.74       661
           1       0.00      0.00      0.00       387
           2       0.88      0.98      0.93      3051

    accuracy                           0.85      4099
   macro avg       0.53      0.58      0.56      4099
weighted avg       0.77      0.85      0.81      4099



In [22]:
# Save the trained model
# Save the trained model in the native Keras format
model.save('sentiment_analysis_model.h5')
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [4]:
# Load the saved model and tokenizer
import keras

model = keras.models.load_model('sentiment_analysis_model.h5')
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)




In [5]:
# Define a function to predict the sentiment of input text
def predict_sentiment(text):
    # Tokenize and pad the input text
    text_sequence = tokenizer.texts_to_sequences([text])
    text_sequence = pad_sequences(text_sequence, maxlen=100)

    # Make a prediction using the trained model
    predicted_rating = model.predict(text_sequence)[0]

    positive_prob = predicted_rating[2]
    negative_prob = predicted_rating[0]
    sentiment_score = positive_prob - negative_prob
    print(f"Sentiment Score: {sentiment_score}")

    if np.argmax(predicted_rating) == 0:
        return 'Negative'
    elif np.argmax(predicted_rating) == 1:
        return 'Neutral'
    else:
        return 'Positive'


In [6]:
# Example usage
text_input = "I absolutely loved my stay at that hotel. The staff was amazing and the room was fantastic!"
predicted_sentiment = predict_sentiment(text_input)
print(predicted_sentiment)

Sentiment Score: 0.9996973276138306
Positive


In [7]:
# Example usage
text_input = "I hate that product. Will not buy it again"
predicted_sentiment = predict_sentiment(text_input)
print(predicted_sentiment)

Sentiment Score: -0.8291456699371338
Negative


In [11]:
# Example usage
text_input = "Overall, it was an average experience"
predicted_sentiment = predict_sentiment(text_input)
print(predicted_sentiment)

Sentiment Score: -0.024971187114715576
Neutral
