# __Convolutional Neural Network (CNN)__

In [46]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import (
    confusion_matrix,
    classification_report
)
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical

# Custom libraries
import sys
sys.path.append('..')
from functions.models import *

### Load data

In [41]:
X_train = pd.read_csv('../data/X_train.tsv', sep='\t')
X_test = pd.read_csv('../data/X_test.tsv', sep='\t')
X_val = pd.read_csv('../data/X_val.tsv', sep='\t')

y_train = pd.read_csv('../data/y_train.tsv', sep='\t')
y_test = pd.read_csv('../data/y_test.tsv', sep='\t')
y_val = pd.read_csv('../data/y_val.tsv', sep='\t')

# Check
print(X_train.head())
print(y_train.head())

                                             comment
0  everyone think he laugh screwing people instea...
1                                               fuck
2                               make feel threatened
3                              dirty southern wanker
4  omg good enough help u playoff dumbass bronco ...
   label
0     27
1      2
2     14
3      3
4     26


### Tokenization, Padding and Sequencing

In [44]:
# Define tokenizer
tokenizer = Tokenizer(oov_token="<OOV>")
# Obtain padded train and test sequences, length of longest sequence and vocabulary size
train_padded, val_padded, max_seq_len, vocab_size, tokenizer = tokenization(tokenizer, X_train, X_val, "comment")

### Model Building

In [47]:
# Lista de emociones
cat_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
    'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
    'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism',
    'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]
num_labels = len(cat_labels)

# Convertir las etiquetas a un formato adecuado para el entrenamiento
y_train_categorical = to_categorical(y_train, num_classes=num_labels)
y_val_categorical = to_categorical(y_val, num_classes=num_labels)

In [None]:
# Crear el modelo
model = Sequential()

# Embedding layer
model.add(Embedding(input_dim=vocab_size, output_dim=50, input_length=max_seq_len))

# Convolutional layers
model.add(Conv1D(filters=16, kernel_size=3, strides=1, padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.5))

# Flatten and classifier
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(32, activation="relu"))

# Output layer
model.add(Dense(num_labels, activation="softmax"))  # Usar softmax para clasificación múltiple

# Compilar el modelo
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Resumen del modelo
model.summary()

### Model Training

In [48]:
# Definir EarlyStopping y ModelCheckpoint
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, restore_best_weights=True)
mc = ModelCheckpoint(filepath='../trained_models/cnn.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)

# Entrenar el modelo
history = model.fit(
    train_padded, y_train_categorical,
    epochs=30,
    batch_size=64,
    shuffle=True,
    validation_data=(val_padded, y_val_categorical),
    callbacks=[es, mc]
)

Epoch 1/30
Epoch 1: val_loss improved from inf to 2.71873, saving model to ../trained_models\cnn.h5
Epoch 2/30
Epoch 2: val_loss improved from 2.71873 to 2.58660, saving model to ../trained_models\cnn.h5
Epoch 3/30
Epoch 3: val_loss improved from 2.58660 to 2.54288, saving model to ../trained_models\cnn.h5
Epoch 4/30
Epoch 4: val_loss improved from 2.54288 to 2.50418, saving model to ../trained_models\cnn.h5
Epoch 5/30
Epoch 5: val_loss improved from 2.50418 to 2.47828, saving model to ../trained_models\cnn.h5
Epoch 6/30
Epoch 6: val_loss did not improve from 2.47828
Epoch 7/30
Epoch 7: val_loss improved from 2.47828 to 2.45229, saving model to ../trained_models\cnn.h5
Epoch 8/30
Epoch 8: val_loss did not improve from 2.45229
Epoch 9/30
Epoch 9: val_loss did not improve from 2.45229
Epoch 10/30
Epoch 10: val_loss did not improve from 2.45229
Epoch 11/30
Epoch 11: val_loss did not improve from 2.45229
Epoch 12/30

Epoch 12: val_loss did not improve from 2.45229
Epoch 12: early stopping


### Model Validation

### Model Evaluation