In [1]:
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Define the sentences
positive_sentences = ["I love this product.", "This is an amazing place.", "I feel great today.", "I am so excited about the concert.", "He is my best friend."]
negative_sentences = ["I do not like this at all.", "This is a terrible mistake.", "I am tired of this nonsense.", "I can't stand the rain.", "He is my worst enemy."]



In [2]:
def preprocess_text(sen):

    sen = re.sub('<.*?>', ' ', sen) # remove html tag

    tokens = word_tokenize(sen)  # tokenizing words

    tokens = [w.lower() for w in tokens]    # lower case

    table = str.maketrans('', '', string.punctuation)  # remove punctuations
    stripped = [w.translate(table) for w in tokens]

    words = [word for word in stripped if word.isalpha()]  # remove non alphabet
    stop_words = set(stopwords.words('english'))

    # words = [w for w in words if not w in stop_words]   # remove stop words
    words = [w for w in words if len(w) > 2]  # Ignore words less than 2

    sentence = ' '.join(words)

    return sentence

In [3]:
processed_positive_sentence = [preprocess_text(sen) for sen in positive_sentences]
processed_negative_sentence = [preprocess_text(sen) for sen in negative_sentences]
print(processed_positive_sentence)
print(processed_negative_sentence)

['love this product', 'this amazing place', 'feel great today', 'excited about the concert', 'best friend']
['not like this all', 'this terrible mistake', 'tired this nonsense', 'stand the rain', 'worst enemy']


In [4]:
def load_glove_embeddings(path):
    embeddings = {}
    with open(path, 'r', encoding='utf-8') as file:
        for line in file:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            embeddings[word] = vector
    return embeddings

# Load the GloVe embeddings
embeddings = load_glove_embeddings('glove.6B.300d.txt')

# Convert the sentences to GloVe embeddings
positive_embeddings = [[embeddings[word] for word in sentence.split()] for sentence in processed_positive_sentence]
negative_embeddings = [[embeddings[word] for word in sentence.split()] for sentence in processed_negative_sentence]

In [11]:
# Average the word embeddings for each sentence
positive_vectors = [np.mean(sentence, axis=0) for sentence in positive_embeddings]
negative_vectors = [np.mean(sentence, axis=0) for sentence in negative_embeddings]

# Combine the positive and negative vectors
X = positive_vectors + negative_vectors

# Create labels for the positive and negative sentences
y = [1 for _ in positive_vectors] + [0 for _ in negative_vectors]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = Sequential()
model.add(Dense(1, input_dim=300, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])

# Train the model
model.fit(np.array(X_train), np.array(y_train), epochs=10, validation_data=(np.array(X_test), np.array(y_test)))

# Evaluate the model
loss, accuracy = model.evaluate(np.array(X_test), np.array(y_test))
print(f"Test Accuracy: {accuracy * 100}%")

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step - accuracy: 0.5000 - loss: 0.6903 - val_accuracy: 0.5000 - val_loss: 0.6769
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5000 - loss: 0.6808 - val_accuracy: 0.5000 - val_loss: 0.6772
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5000 - loss: 0.6714 - val_accuracy: 0.5000 - val_loss: 0.6774
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5000 - loss: 0.6623 - val_accuracy: 0.5000 - val_loss: 0.6777
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5000 - loss: 0.6533 - val_accuracy: 0.5000 - val_loss: 0.6780
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5000 - loss: 0.6445 - val_accuracy: 0.5000 - val_loss: 0.6782
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━

In [14]:
# Define the sentences
sentence = "excellent weather"
# sentence = "worst weather"
processed_sentence = preprocess_text(sentence)

# Convert the sentence to GloVe embeddings
sentence_vector = np.mean([embeddings.get(word.lower(), np.zeros((300,))) for word in processed_sentence.split()], axis=0)

# Create a batch
X_predict = np.array([sentence_vector])

# Use the model to predict the sentiment of the sentence
prediction = model.predict(X_predict)

# Print the prediction
print(f"Sentence prediction: {prediction[0][0]}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Sentence prediction: 0.6256235241889954
