<a href="https://colab.research.google.com/github/micaelCZ/Paper_Repositorio/blob/main/PAPER_Escenario_A_SVM_KERNELS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Escenario A KERNEL=SIGMOID


https://raw.githubusercontent.com/micaelCZ/Paper_Repositorio/main/dataset/datasetPreprocesado/Escenario1.csv


In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.metrics import precision_recall_fscore_support
from sklearn.svm import SVC

In [2]:
# Cargar el Dataset
datapath = 'https://raw.githubusercontent.com/micaelCZ/Paper_Repositorio/main/dataset/datasetPreprocesado/Escenario1.csv'
dataframe = pd.read_csv(datapath, low_memory=False, sep=';')


In [3]:
dataframe.head()

Unnamed: 0,Source IP,Source Port,Destination IP,Destination Port,Protocol,Flow Duration,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,...,Bwd IAT Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,label
0,10.0.2.15,53913,216.58.208.46,80,6,435,0,45.977.011.494.253,435,0,...,0,0,0,0,0,0,0,0,0,nonTOR
1,10.0.2.15,53913,216.58.208.46,80,6,259,0,77.220.077.220.077,259,0,...,0,0,0,0,0,0,0,0,0,nonTOR
2,10.0.2.15,53913,216.58.208.46,80,6,891,0,22.446.689.113.356,891,0,...,0,0,0,0,0,0,0,0,0,nonTOR
3,10.0.2.15,53913,216.58.208.46,80,6,1074,0,18.621.973.929.237,1074,0,...,0,0,0,0,0,0,0,0,0,nonTOR
4,10.0.2.15,53913,216.58.208.46,80,6,315,0,63.492.063.492.064,315,0,...,0,0,0,0,0,0,0,0,0,nonTOR


In [4]:
dataframe.shape

(7000, 29)

In [5]:
def dfNormalize(df):
    for feature_name in df.columns:
        df.loc[:,feature_name]= pd.to_numeric(df.loc[:,feature_name], errors='coerce').fillna(0)
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        if (max_value - min_value) > 0:
            df.loc[:,feature_name] = (df.loc[:,feature_name] - min_value) / (max_value - min_value)
        else:
            df.loc[:,feature_name] = (df.loc[:,feature_name]- min_value)
    return df

In [6]:
dataframe = dataframe.reindex(np.random.permutation(dataframe.index)).copy()


In [7]:
keys = dataframe.keys()
data_to_process = dataframe[keys[4:len(keys) - 1]].copy()
x_normalised = dfNormalize(data_to_process)
print(x_normalised.describe())

        Protocol   Flow Duration   Flow Bytes/s   Flow Packets/s  \
count     7000.0     7000.000000    7000.000000      7000.000000   
mean         0.0        0.400219       0.000143         0.000167   
std          0.0        0.445049       0.011952         0.012051   
min          0.0        0.000000       0.000000         0.000000   
25%          0.0        0.020134       0.000000         0.000000   
50%          0.0        0.044090       0.000000         0.000000   
75%          0.0        0.982961       0.000000         0.000000   
max          0.0        1.000000       1.000000         1.000000   

        Flow IAT Mean   Flow IAT Std   Flow IAT Max   Flow IAT Min  \
count     7000.000000         7000.0    7000.000000    7000.000000   
mean         0.017407            0.0       0.114853       0.010376   
std          0.055836            0.0       0.211398       0.043627   
min          0.000000            0.0       0.000000       0.000000   
25%          0.000000            0.0 

In [8]:
change_labels = lambda x: 1 if x == 'nonTOR' else 0

In [9]:
y_normalised = dataframe['label'].apply(change_labels)


In [10]:
# Imputar los valores faltantes con SimpleImputer
imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(x_normalised)

In [11]:

# Escalar los datos utilizando StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

In [15]:
# Entrenar un modelo SVM utilizando SVC de sklearn y cross validation
svm = SVC(kernel='sigmoid', gamma='auto', coef0=6, C=1.0, random_state=42)
scores = cross_val_score(svm, X_scaled, y_normalised, cv=5, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


Accuracy: 0.51 (+/- 0.00)


In [16]:
# Evaluar el modelo utilizando las métricas de precisión, recall y f1-score
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_normalised, test_size=0.3, random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='micro')
print('Precision:', precision*100)
print('Recall:', recall*100)
print('F1-score:', f1_score*100)

Precision: 49.28571428571429
Recall: 49.28571428571429
F1-score: 49.28571428571429


#  **Escenario A KERNEL=LINEAR**







In [80]:
from sklearn.model_selection import cross_val_score

svm = SVC(kernel='linear', C=0.000001, random_state=42)
scores = cross_val_score(svm, X_scaled, y_normalised, cv=5, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


Accuracy: 0.57 (+/- 0.01)


In [63]:
# Evaluar el modelo utilizando las métricas de precisión, recall y f1-score
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_normalised, test_size=0.3, random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='micro')
print('Precision:', precision*100)
print('Recall:', recall*100)
print('F1-score:', f1_score*100)

Precision: 49.80952380952381
Recall: 49.80952380952381
F1-score: 49.80952380952381


#  **Escenario A KERNEL= RBF**

In [84]:
from sklearn.model_selection import cross_val_score

svm = SVC(kernel='rbf', gamma='auto', C=0.00001, random_state=42)
scores = cross_val_score(svm, X_scaled, y_normalised, cv=5, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


Accuracy: 0.88 (+/- 0.02)


In [65]:
# Evaluar el modelo utilizando las métricas de precisión, recall y f1-score
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_normalised, test_size=0.3, random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='micro')
print('Precision:', precision*100)
print('Recall:', recall*100)
print('F1-score:', f1_score*100)

Precision: 49.80952380952381
Recall: 49.80952380952381
F1-score: 49.80952380952381


#  **Escenario A KERNEL= POLINOMYAL**

In [87]:
# Entrenar un modelo SVM utilizando SVC de sklearn y cross validation
svm = SVC(kernel='poly', degree=3, gamma='auto', coef0=6, C=0.00001, random_state=42)
scores = cross_val_score(svm, X_scaled, y_normalised, cv=5, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


Accuracy: 0.57 (+/- 0.01)


In [67]:
# Evaluar el modelo utilizando las métricas de precisión, recall y f1-score
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_normalised, test_size=0.3, random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='micro')
print('Precision:', precision*100)
print('Recall:', recall*100)
print('F1-score:', f1_score*100)

Precision: 56.095238095238095
Recall: 56.095238095238095
F1-score: 56.095238095238095
