In [33]:
import pandas as pd
import nltk
import re
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Normalization

# Index
* [DNN: Predicting Category Type](#DNN:-Predicting-Category-Type)
    * [Encoding the Target](#Encoding-the-Target)
    * [Split](#Split)
    * [Train the Model](#Train-the-Model)

# DNN: Predicting Category Type
[Index](#Index)

[`regression`](https://www.tensorflow.org/tutorials/keras/regression)
[`layers/Normalization`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Normalization)

## Encoding the Target
[Index](#Index)

In [34]:
dataframe=pd.read_csv('/tf/ML-project/data/processedDataframe.csv')

In [35]:
# Ejemplo con LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(dataframe["Category"])
# Ahora `y_encoded` contiene valores numéricos para cada animal

In [36]:
dataframe.drop(columns=["Category"]).head(2)

Unnamed: 0,Nutrient Data Bank Number,Data.Alpha Carotene,Data.Ash,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,Data.Choline,Data.Fiber,Data.Kilocalories,...,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - IU,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
0,1001,0,2.11,158,0,0.06,215,19,0.0,717,...,24,576,0.09,2499,684,0.17,0.003,0.0,2.32,7.0
1,1002,0,2.11,158,0,0.06,219,19,0.0,717,...,26,827,0.05,2499,684,0.13,0.003,0.0,2.32,7.0


## Split
[Index](#Index)

In [37]:
X, y = dataframe.drop(columns=["Category"]), y_encoded

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [38]:
len(X.columns)

41

## Train the Model
[Index](#Index)

In [49]:
# Número de características de entrada y clases de salida
n_features = len(X.columns)
n_classes = pd.Series(y_encoded).nunique()

# Crear el modelo
model = Sequential([
    Normalization(input_shape=(n_features,)),  # Capa de normalización
    Dense(128, activation='relu'),  # Primera capa oculta
    Dense(256, activation='relu'),  # Segunda capa oculta
    Dense(128, activation='relu'),  # Tercera capa oculta
    Dense(n_classes, activation='softmax')  # Capa de salida
])

# Antes de compilar el modelo, debes adaptar la capa de Normalization a tus datos
# Esto calculará la media y la desviación estándar de tus datos de entrenamiento
# y los usará para la normalización durante el entrenamiento e inferencia.

# Supongamos que 'X_train' son tus datos de entrenamiento
model.layers[0].adapt(X_train.to_numpy())

# Compilar el modelo
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Resumen del modelo
model.summary()

# Entrenar el modelo como antes
# model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2)


  super().__init__(**kwargs)


In [50]:
# Entrenar el modelo
history = model.fit(X_train, y_train,
                    epochs=30,
                    batch_size=32,
                    validation_split=0.2)  # Usa el 20% de los datos para validación


Epoch 1/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.1304 - loss: 5.5294 - val_accuracy: 0.2767 - val_loss: 4.2464
Epoch 2/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2963 - loss: 3.6616 - val_accuracy: 0.3612 - val_loss: 3.6740
Epoch 3/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.3667 - loss: 2.9555 - val_accuracy: 0.4276 - val_loss: 3.4637
Epoch 4/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.4568 - loss: 2.4360 - val_accuracy: 0.4728 - val_loss: 3.2553
Epoch 5/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5105 - loss: 2.0903 - val_accuracy: 0.5050 - val_loss: 3.3320
Epoch 6/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.5538 - loss: 1.7496 - val_accuracy: 0.5523 - val_loss: 3.3478
Epoch 7/30
[1m125/125[0

In [51]:
# Evaluar el modelo con el conjunto de datos de prueba
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6067 - loss: 4.4612
Test Loss: 4.348629474639893
Test Accuracy: 0.6195341348648071
