<a href="https://colab.research.google.com/github/ridhokurniawan-u/Sentiment-Model/blob/main/Sentiment_TrainingModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install keras_nlp

Collecting keras_nlp
  Downloading keras_nlp-0.7.0-py3-none-any.whl (415 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m415.4/415.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras-core (from keras_nlp)
  Downloading keras_core-0.1.7-py3-none-any.whl (950 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-text (from keras_nlp)
  Downloading tensorflow_text-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
Collecting namex (from keras-core->keras_nlp)
  Downloading namex-0.0.7-py3-none-any.whl (5.8 kB)
Installing collected packages: namex, keras-core, tensorflow-text, keras_nlp
Successfully installed keras-core-0.1.7 keras_nlp-0.7.0 namex-0.0.7 tensorflow-text-2.15.0


In [8]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive')

# Load the IMDb dataset
max_features = 10000  # Only consider the top 10,000 words in the dataset
maxlen = 100  # Cut reviews after 100 words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to have consistent length for input to the model
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Build the model
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=15, batch_size=32, validation_split=0.2)

# Evaluate the model on test data
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {test_acc}')

# Save the model to Google Drive
model_save_path = '/content/gdrive/My Drive/Sentiment_Model.h5'
model.save(model_save_path)


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test Accuracy: 0.8262400031089783


In [9]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive')

# Load the IMDb dataset (for test data)
max_features = 10000  # Only consider the top 10,000 words in the dataset
maxlen = 100  # Cut reviews after 100 words
(_, _), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to have consistent length for input to the model
x_test = pad_sequences(x_test, maxlen=maxlen)

# Load the saved model
model_save_path = '/content/gdrive/My Drive/Sentiment_Model.h5'
model = tf.keras.models.load_model(model_save_path)

# Evaluate the model on test data
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {test_acc}')


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Test Accuracy: 0.8262400031089783


In [11]:
import json
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import text_to_word_sequence
from tensorflow.keras.datasets import imdb
from google.colab import drive

# Function to load the saved model
def load_model(model_path):
    model = tf.keras.models.load_model(model_path)
    return model

# Function to preprocess the text data
def preprocess_text(text, word_index, maxlen=100):
    tokens = text_to_word_sequence(text)
    tokens = [word_index.get(word, 0) for word in tokens]
    padded = pad_sequences([tokens], maxlen=maxlen)
    return padded

# Function to predict sentiment
def predict_sentiment(review, model, word_index):
    processed_review = preprocess_text(review, word_index)
    prediction = model.predict(processed_review)
    return prediction[0][0]

# Function to classify sentiment
def classify_sentiment(prediction):
    return "Positive" if prediction > 0.5 else "Negative"

# Function to process reviews and write results to a file
def analyze_reviews(json_path, model, word_index, output_path):
    with open(json_path, 'r') as file:
        data = json.load(file)

    with open(output_path, 'w') as file:
        for item in data:
            review = item['quotes']
            prediction = predict_sentiment(review, model, word_index)
            sentiment = classify_sentiment(prediction)
            file.write(f'Review: {review}\nSentiment: {sentiment} ({prediction * 100:.2f}% confidence)\n\n')

# Mount Google Drive to access the model and the JSON file
drive.mount('/content/gdrive')

# Paths
model_path = '/content/gdrive/MyDrive/Sentiment_Model.h5'
json_path = '/content/gdrive/MyDrive/Sentiment_Model/reformatted_quotes_AbsolutelyAnything.json'
output_path = '/content/gdrive/MyDrive/Sentiment_Model/sentiment_results_AbsolutelyAnything.txt'

# Load the model and IMDb word index
model = load_model(model_path)
word_index = imdb.get_word_index()

# Analyze the reviews
analyze_reviews(json_path, model, word_index, output_path)

print("Sentiment analysis complete. Results saved to sentiment_results.txt.")


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Sentiment analysis complete. Results saved to sentiment_results.txt.
