In [None]:
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Assume you have a DataFrame 'df' with columns 'text' and 'sentiment'
# 'text' contains the scraped and cleaned text of posts
# 'sentiment' contains the corresponding sentiment labels (positive, negative, neutral)

# Load your scraped dataset
# df = pd.read_csv('your_dataset.csv')

# Preprocess the text data
def preprocess_text(text):
    # Your text cleaning/preprocessing code here (convert to lowercase, remove punctuation, etc.)
    # For example:
    text = text.lower()
    text = BeautifulSoup(text, 'html.parser').get_text()
    # Additional cleaning steps...

    return text

df['text'] = df['text'].apply(preprocess_text)

# Encode the sentiment labels
label_encoder = LabelEncoder()
df['sentiment_encoded'] = label_encoder.fit_transform(df['sentiment'])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment_encoded'], test_size=0.2, random_state=42)

# Tokenize and pad the text data
max_words = 10000
max_len = 100
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Build the LSTM model
model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_len))
model.add(LSTM(64))
model.add(Dense(3, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
accuracy = model.evaluate(X_test_pad, y_test)[1]
print(f'Model Accuracy on Test Set: {accuracy * 100:.2f}%')


NameError: ignored