# Fase 3b: Deep Learning Models (BiLSTM)

En este notebook, usamos `Word2Vec` para entrenar embeddings y `BiLSTM` para la clasificaci√≥n.

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split

# Add src
sys.path.append(os.path.abspath("../src"))
from dl_models import DLManager

## 1. Load Data

In [2]:
data_path = Path("../data/processed_corpus_balanced.csv")
df = pd.read_csv(data_path)
df = df.dropna(subset=['clean_text', 'sentiment_score'])

X = df['clean_text'].astype(str)
y = df['sentiment_score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"Train: {len(X_train)}, Test: {len(X_test)}")

Train: 94186, Test: 23547


## 2. Train Word2Vec & BiLSTM

In [3]:
dl = DLManager(vector_size=100, max_len=100, hidden_dim=128)

# 1. Train Embeddings
print("Training Word2Vec...")
dl.train_w2v(X_train) # Using train set mainly, or full X

# 2. Train Model
# Split Train again for Val
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42, stratify=y_train)

print("Training BiLSTM...")
model = dl.train_model(X_tr, y_tr, X_val, y_val, epochs=5, batch_size=64)


Using device: cpu
Training Word2Vec...
Training Word2Vec...
Word2Vec trained.
Training BiLSTM...


Epoch 1/5:   3%|‚ñé         | 41/1325 [00:11<05:49,  3.68it/s]


KeyboardInterrupt: 

## 3. Evaluation

In [None]:
report = dl.evaluate(X_test, y_test)
print(report)