In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [2]:

# Load GloVe Embeddings
def load_glove_embeddings(embedding_dim, tokenizer, vocab_size):
    embeddings_index = {}
    glove_file_path = 'glove/glove.6B.100d.txt'
    with open(glove_file_path, encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
            
    embedding_matrix = np.zeros((vocab_size, embedding_dim))
    for word, i in tokenizer.word_index.items():
        if i < vocab_size:
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
                
    return embedding_matrix

In [3]:
# Define the CNN model with pre-trained embeddings
def cnn_model(vocab_size, embedding_dim, max_length, num_filters, embedding_matrix):
    inputs = Input(shape=(max_length,))
    embedding = Embedding(vocab_size, embedding_dim, weights=[embedding_matrix], trainable=False)(inputs)
    conv_blocks = []
    for kernel_size in [3, 4, 5]:
        conv = Conv1D(filters=num_filters, kernel_size=kernel_size, activation='relu')(embedding)
        pool = GlobalMaxPooling1D()(conv)
        conv_blocks.append(pool)
        
    concat = tf.keras.layers.concatenate(conv_blocks)
    dropout = Dropout(0.5)(concat)
    outputs = Dense(3, activation='softmax')(dropout)  # 3 classes for sad, happy, neutral
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [30]:
# Prepare the tokenizer
def prepare_tokenizer(texts, vocab_size, max_length):
    tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
    tokenizer.fit_on_texts(texts)
    return tokenizer

# Predict sentiment of a statement
def predict_sentiment(statement, tokenizer, model, max_length):
    sequences = tokenizer.texts_to_sequences([statement])
    padded = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
    prediction = model.predict(padded)
    sentiment_labels = ['Sad', 'Neutral', 'Happy']
    return sentiment_labels[np.argmax(prediction)]

In [31]:
# Sample training data (normally you'd have more data)
texts = [
    "I am very happy today!", "I feel so sad.", "This is a neutral statement.",
    "I am so excited about this!", "I am disappointed.", "It is okay."
]
labels = [2, 0, 1, 2, 0, 1]  # Happy=2, Sad=0, Neutral=1

# Tokenizer and model setup
vocab_size = 10000
embedding_dim = 100
max_length = 100
num_filters = 64
tokenizer = prepare_tokenizer(texts, vocab_size, max_length)
embedding_matrix = load_glove_embeddings(embedding_dim, tokenizer, vocab_size)
model = cnn_model(vocab_size, embedding_dim, max_length, num_filters, embedding_matrix)

In [32]:
# Convert texts to sequences and pad them
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')

# Train the model
model.fit(padded_sequences, np.array(labels), epochs=5)

# Predict sentiment of a new statement
statement = "i am very happy"
predicted_sentiment = predict_sentiment(statement, tokenizer, model, max_length)
print(f"Predicted Sentiment: {predicted_sentiment}")


Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369ms/step - accuracy: 0.3333 - loss: 1.2783
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8333 - loss: 0.6405
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5000 - loss: 0.8889
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5000 - loss: 0.5695
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.2776
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Predicted Sentiment: Happy
