In [None]:
# Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from sklearn.model_selection import train_test_split

In [None]:
# Load preprocessed data
train_data = pd.read_csv('../data/processed/train_data.csv')

In [None]:
# Prepare data for training
max_words = 5000
max_len = 200

In [None]:
# Tokenization
tokenizer = Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(train_data['review'].values)
X = tokenizer.texts_to_sequences(train_data['review'].values)
X = pad_sequences(X, maxlen=max_len)
Y = pd.get_dummies(train_data['sentiment']).values

In [None]:
# Split data into training and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
# Creating our model
model = Sequential()
model.add(Embedding(max_words, 100, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))

# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(X_train, Y_train, epochs=5, batch_size=64, validation_data=(X_val, Y_val), verbose=2)

In [None]:
# Save the trained model
model.save('../models/sentiment_model.h5')