In [16]:
# Import necessary libraries
!pip install keras-tuner --upgrade
import numpy as np
from keras.src.layers import Bidirectional  # Import Bidirectional layer from Keras
from keras_tuner import HyperModel, BayesianOptimization, RandomSearch  # Import HyperModel and BayesianOptimization from Keras Tuner
from tensorflow.keras.models import Sequential  # Import Sequential model from TensorFlow Keras
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Conv1D, GlobalMaxPooling1D  # Import various layers from TensorFlow Keras
from tensorflow.keras.preprocessing.text import Tokenizer  # Import Tokenizer for text preprocessing
from tensorflow.keras.preprocessing.sequence import pad_sequences  # Import pad_sequences for padding sequences
from tensorflow.keras.optimizers import Adam  # Import Adam optimizer
from tensorflow.keras.losses import SparseCategoricalCrossentropy  # Import SparseCategoricalCrossentropy loss
from sklearn.utils.class_weight import compute_class_weight  # Import compute_class_weight for calculating class weights
from sklearn.model_selection import train_test_split  # Import train_test_split for splitting data
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau  # Import callbacks for model training
from tensorflow.keras.losses import SparseCategoricalCrossentropy  # Import SparseCategoricalCrossentropy loss from TensorFlow Keras
from keras_tuner.engine.hypermodel import HyperModel  # Import HyperModel from Keras Tuner
from nltk.corpus import stopwords  # Import stopwords from NLTK corpus
import re  # Import re module for regular expressions
import nltk  # Import NLTK library for natural language processing



In [17]:
# Download NLTK stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [18]:
data = [
    ("I love this product!", "positive"),
    ("Terrible experience, never buying again.", "negative"),
    ("The iPhone 14 battery lasts much longer than my previous phone.", "positive"),
    ("The customer support at Amazon is terrible.", "negative"),
    ("Netflix has some good shows but it's getting expensive.", "neutral"),
    ("Zomato delivery was fast and the food was still hot!", "positive"),
    ("I regret booking through MakeMyTrip. The hotel was awful.", "negative"),
    ("Spotify recommendations are hit or miss.", "neutral"),
    ("Swiggy's UI is smooth and easy to use.", "positive"),
    ("The bank's mobile app crashes too often.", "negative"),
    ("Using Flipkart is okay but the packaging can be better.", "neutral"),
    ("Ola's ride tracking feature is really helpful.", "positive"),
    ("The Redmi phone heats up quickly when gaming.", "negative"),
    ("JioFiber speed is good most of the time.", "neutral"),
    ("It's okay, nothing special.", "neutral"),
    ("I'm so disappointed with the service.", "negative"),
    ("I regret buying this.", "negative"),
    ("I don't really have an opinion.", "neutral"),
    ("Absolutely wonderful experience!", "positive"),
    ("Awful customer support.", "negative"),
    ("It's just average, nothing remarkable.", "neutral"),
    ("Couldn't be more dissatisfied.", "negative"),
    ("It's alright, I guess.", "neutral"),
    ("The product exceeded my expectations!", "positive"),
    ("Horrible quality, waste of money.", "negative"),
    ("Best service I've ever received.", "positive"),
    ("I'm really angry with the company.", "negative"),
    ("Not bad, but not great either.", "neutral"),
    ("I'm really impressed with the service.", "positive"),
    ("Worst experience ever!", "negative"),
    ("Nothing much to say.", "neutral"),
    ("Eh, it’s okay.", "neutral")
] # Contains text samples and their corresponding sentiment labels

In [19]:
# Preprocessing function to clean and tokenize text data
def preprocess(text):
    text = text.lower()  # Convert text to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = " ".join([word for word in text.split() if word not in stopwords.words('english')])  # Remove stopwords
    return text

In [20]:
# Apply preprocessing to the dataset
corpus = [preprocess(text) for text, _ in data]

In [21]:
# Tokenization and padding
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')  # Initialize Tokenizer with a vocabulary size limit
tokenizer.fit_on_texts(corpus)  # Fit Tokenizer on preprocessed text data
word_index = tokenizer.word_index  # Get word index from Tokenizer
sequences = tokenizer.texts_to_sequences(corpus)  # Convert text to sequences of indices
padded_sequences = pad_sequences(sequences, padding='post')  # Pad sequences to the same length

In [22]:
# Convert sentiment labels to numerical values
sentiment_mapping = {"positive": 0, "negative": 1, "neutral": 2}  # Mapping of sentiment labels to numerical values
labels = np.array([sentiment_mapping[sentiment] for _, sentiment in data])  # Convert sentiment labels to numerical values

In [23]:
# Split the data into train and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)  # Split data into training and validation sets


In [24]:
# Calculate class weights for imbalanced dataset
class_counts = np.bincount(train_labels)  # Count the occurrences of each class in the training labels
total_samples = sum(class_counts)  # Calculate the total number of samples
class_weights = {cls: total_samples / count for cls, count in enumerate(class_counts)}  # Calculate class weights


In [25]:
# Hypermodel
class SentimentHyperModel(HyperModel):
    def build(self, hp):
        model = Sequential()
        model.add(Embedding(len(word_index) + 1, hp.Int('embedding_dim', 64, 256, step=32)))
        model.add(Bidirectional(LSTM(hp.Int('lstm_units', 64, 128, step=32), return_sequences=True)))
        model.add(GlobalMaxPooling1D())
        model.add(Dense(hp.Int('dense_units', 64, 256, step=32), activation='relu'))
        model.add(Dropout(hp.Float('dense_dropout', 0.2, 0.5, step=0.1)))
        model.add(Dense(3, activation='softmax'))
        model.compile(
            loss=SparseCategoricalCrossentropy(),
            optimizer=Adam(hp.Float('learning_rate', 1e-4, 1e-2, sampling='LOG')),
            metrics=['accuracy']
        )
        return model

In [26]:
# Instantiate the custom hypermodel
hypermodel = SentimentHyperModel()

In [27]:
# Use Bayesian optimization for hyperparameter tuning
tuner = RandomSearch(
    hypermodel,
    objective='val_accuracy',
    max_trials=50,
    directory='my_dir',
    project_name='sentiment_analysis'
)

Reloading Tuner from my_dir/sentiment_analysis/tuner0.json


In [28]:
# Perform the hyperparameter search using training data
tuner.search(train_texts, train_labels, epochs=10, validation_data=(val_texts, val_labels),
             class_weight=class_weights, callbacks=[
        EarlyStopping(patience=3, restore_best_weights=True),
        ReduceLROnPlateau(factor=0.2, patience=2)
    ])

In [29]:
# Load and use the best hyperparameters found by the tuner
best_hps = tuner.get_best_hyperparameters(1)[0]

In [30]:
# Build the model using the best hyperparameters
best_model = hypermodel.build(best_hps)
print(best_model)

<Sequential name=sequential_1, built=False>


In [31]:
# Train the best model with your training data
best_model.fit(train_texts, train_labels, epochs=100, validation_data=(val_texts, val_labels),
               class_weight=class_weights)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9s/step - accuracy: 0.4000 - loss: 3.2855 - val_accuracy: 0.5714 - val_loss: 1.0941
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283ms/step - accuracy: 0.5200 - loss: 3.2846 - val_accuracy: 0.5714 - val_loss: 1.0937
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 339ms/step - accuracy: 0.5600 - loss: 3.2745 - val_accuracy: 0.5714 - val_loss: 1.0932
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step - accuracy: 0.6400 - loss: 3.2721 - val_accuracy: 0.4286 - val_loss: 1.0932
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 370ms/step - accuracy: 0.6000 - loss: 3.2652 - val_accuracy: 0.4286 - val_loss: 1.0928
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step - accuracy: 0.8000 - loss: 3.2546 - val_accuracy: 0.4286 - val_loss: 1.0925
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x788258624bd0>

In [32]:
maxlen = max(len(seq) for seq in tokenizer.texts_to_sequences(corpus))


In [33]:
# Function to predict sentiment using the trained best model
def predict_sentiment_with_best_model(user_input, model):
    preprocessed_input = preprocess(user_input)
    sequence = tokenizer.texts_to_sequences([preprocessed_input])
    padded_sequence = pad_sequences(sequence, padding='post', maxlen=maxlen)
    sentiment_probabilities = model.predict(padded_sequence)[0]
    print("Probabilities:", sentiment_probabilities)
    predicted_sentiment = np.argmax(sentiment_probabilities)
    return list(sentiment_mapping.keys())[list(sentiment_mapping.values()).index(predicted_sentiment)]

In [34]:
# Use best hyperparameters to build the model again
best_model = hypermodel.build(best_hps)


In [37]:
# User interaction loop for sentiment prediction
while True:
    user_input = input("Enter your message (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    sentiment = predict_sentiment_with_best_model(user_input, best_model)
    print("Predicted sentiment:", sentiment)

Enter your message (or 'exit' to quit): its a good product though
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Probabilities: [0.33066016 0.33236837 0.33697152]
Predicted sentiment: neutral
Enter your message (or 'exit' to quit): exit


In [39]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [40]:
!git config --global user.email "rakshanakrishnan42@gmail.com"
!git config --global user.name "rakshanakrish"


In [41]:
!git clone https://github.com/rakshanakrish/Micro-IT-Internship

Cloning into 'Micro-IT-Internship'...


In [43]:
!pwd
!ls



/content
drive  Micro-IT-Internship  my_dir  sample_data
