## We import the required bookstores

In [1]:
import pandas as pd
import numpy as np
import re
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

## We visualize the dataset

In [6]:
df_train = pd.read_csv("sent_train.csv")
df_valid = pd.read_csv("sent_valid.csv")

## Prepoccesing

In [7]:
sentiments = {"LABEL_0": "Bearish", "LABEL_1": "Bullish", "LABEL_2": "Neutral"}

In [8]:
def clean_text(text):
    text = text.lower()  
    text = re.sub(r'http\S+', '', text)  
    text = re.sub(r'[^a-zA-Z\s]', '', text) 
    return text.strip()

In [9]:
df_train['text'] = df_train['text'].astype(str).apply(clean_text)
df_valid['text'] = df_valid['text'].astype(str).apply(clean_text)

## Training

In [11]:
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(df_train['text'])

X_train = tokenizer.texts_to_sequences(df_train['text'])
X_valid = tokenizer.texts_to_sequences(df_valid['text'])

## Padding

In [12]:
max_length = 50
X_train = pad_sequences(X_train, maxlen=max_length, padding='post')
X_valid = pad_sequences(X_valid, maxlen=max_length, padding='post')

## Label codifications

In [13]:
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(df_train['label'])
y_valid = label_encoder.transform(df_valid['label'])

## Labels to one-hot

In [14]:
y_train = to_categorical(y_train, num_classes=3)
y_valid = to_categorical(y_valid, num_classes=3)

## LSTM building

In [15]:
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_length),
    SpatialDropout1D(0.2),
    LSTM(100, dropout=0.2, recurrent_dropout=0.2),
    Dense(3, activation='softmax')
])



## Compilation

In [16]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])