<a href="https://colab.research.google.com/github/micaelCZ/Paper_Repositorio/blob/main/EscenarioB_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Precision, Recall
from keras.callbacks import EarlyStopping
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from tensorflow_addons.metrics import F1Score

In [3]:
# Load the data
url = 'https://raw.githubusercontent.com/micaelCZ/Paper_Repositorio/main/dataset/datasetPreprocesado/Escenario2.csv'
df = pd.read_csv(url)
df = pd.read_csv(url,low_memory=False)
dataframe = pd.read_csv(url,low_memory=False)

In [4]:
# Normalise the data
def dfNormalize(df):
    for feature_name in df.columns:
        df.loc[:,feature_name]= pd.to_numeric(df.loc[:,feature_name], errors='coerce').fillna(0)
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()   
        if (max_value - min_value) > 0:
            df.loc[:,feature_name] = (df.loc[:,feature_name] - min_value) / (max_value - min_value)
        else:
            df.loc[:,feature_name] = (df.loc[:,feature_name]- min_value)    
    return df

In [5]:
# Split the data into train and test sets
y = df['label']
label_map = {'AUDIO': 0, 'BROWSING': 1, 'CHAT': 2, 'FILE-TRANSFER' : 3, 'MAIL' : 4, 'P2P' : 5, 'VIDEO' : 6, 'VOIP' : 7}
change_labels = lambda x: label_map[x] if x in label_map else -1
y = df['label'].apply(change_labels)

In [6]:
X = df.iloc[:, 4:-1]
X = dfNormalize(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [7]:
# Reshape the data for Conv1D
X_train = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))


In [8]:
# One-hot encode the labels
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


In [9]:
def create_model(optimizer='adam', dropout_rate=0.5):
    model = keras.models.Sequential([
        keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
        keras.layers.MaxPool1D(pool_size=2),
        keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu'),
        keras.layers.MaxPool1D(pool_size=2),
        keras.layers.Flatten(),
        keras.layers.Dense(256, activation='relu'),
        keras.layers.Dropout(dropout_rate),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(dropout_rate),
        keras.layers.Dense(8, activation='softmax')
    ])
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', 
                  metrics=['accuracy', Precision(), Recall(), F1Score(num_classes=8, average='macro')])
    return model

In [10]:
# Create the KerasClassifier for GridSearchCV
model = KerasClassifier(build_fn=create_model, verbose=0)





  model = KerasClassifier(build_fn=create_model, verbose=0)


In [11]:
# Define the hyperparameters to search
param_grid = {
    'optimizer': ['adam', 'sgd'],
    'dropout_rate': [0.5, 0.6, 0.7, 0.8, 0.9], # Aumentar el rango de valores de dropout_rate
    'epochs': [30, 40, 50] # Aumentar el número de épocas de entrenamiento
}

In [12]:
# Get the best hyperparameters and fit the model with them
best_params = grid_search.best_params_
best_optimizer = best_params['optimizer']
best_dropout_rate = best_params['dropout_rate']
best_epochs = best_params['epochs']
best_model = create_model(optimizer=best_optimizer, dropout_rate=best_dropout_rate)
best_model.fit(X_train, y_train, epochs=best_epochs, batch_size=32, verbose=1, validation_split=0.1,
              callbacks=[EarlyStopping(patience=5)])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50


Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50


<keras.callbacks.History at 0x1be6d7c3cd0>

In [13]:

# Evaluate the model on test data
test_loss, test_acc, test_precision, test_recall, test_f1score = best_model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_acc:.4f}')
print(f'Test Precision: {test_precision:.4f}')
print(f'Test Recall: {test_recall:.4f}')
print(f'Test F1 Score: {test_f1score:.4f}')


Test Loss: 0.6877
Test Accuracy: 0.7595
Test Precision: 0.8175
Test Recall: 0.6849
Test F1 Score: 0.5718
