In [None]:
!pip install pandas scikit-learn tensorflow



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [None]:
url = "https://raw.githubusercontent.com/incribo-inc/cybersecurity_attacks/main/cybersecurity_attacks.csv"
df = pd.read_csv(url)
print(df)

                 Timestamp Source IP Address Destination IP Address  \
0      2023-05-30 06:33:58     103.216.15.12           84.9.164.252   
1      2020-08-26 07:08:30    78.199.217.198         66.191.137.154   
2      2022-11-13 08:23:25      63.79.210.48          198.219.82.17   
3      2023-07-02 10:38:46     163.42.196.10        101.228.192.255   
4      2023-07-16 13:11:07     71.166.185.76        189.243.174.238   
...                    ...               ...                    ...   
39995  2023-05-26 14:08:42      26.36.109.26         121.100.75.240   
39996  2023-03-27 00:38:27      17.21.163.81         196.108.134.78   
39997  2022-03-31 01:45:49     162.35.217.57            98.107.0.15   
39998  2023-09-22 18:32:38    208.72.233.205         173.79.112.252   
39999  2023-10-10 11:59:52     14.102.21.108           109.198.45.7   

       Source Port  Destination Port Protocol  Packet Length Packet Type  \
0            31225             17616     ICMP            503        Dat

In [None]:
# Identificar colunas numéricas e preechendo valores faltantes
numeric_columns = df.select_dtypes(include=[np.number]).columns

df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].median())

# Setando como Unknown colunas categóricas
categorical_columns = df.select_dtypes(include=['object']).columns
df[categorical_columns] = df[categorical_columns].fillna("Unknown")

print(df.head())

X = df.drop(columns=["Severity Level"])
y = df["Severity Level"]


             Timestamp Source IP Address Destination IP Address  Source Port  \
0  2023-05-30 06:33:58     103.216.15.12           84.9.164.252        31225   
1  2020-08-26 07:08:30    78.199.217.198         66.191.137.154        17245   
2  2022-11-13 08:23:25      63.79.210.48          198.219.82.17        16811   
3  2023-07-02 10:38:46     163.42.196.10        101.228.192.255        20018   
4  2023-07-16 13:11:07     71.166.185.76        189.243.174.238         6131   

   Destination Port Protocol  Packet Length Packet Type Traffic Type  \
0             17616     ICMP            503        Data         HTTP   
1             48166     ICMP           1174        Data         HTTP   
2             53600      UDP            306     Control         HTTP   
3             32534      UDP            385        Data         HTTP   
4             26646      TCP           1462        Data          DNS   

                                        Payload Data  ... Action Taken  \
0  Qui natus

In [None]:
numeric_columns = X.select_dtypes(include=[np.number]).columns
X_numeric = X[numeric_columns]

# Normalização das variáveis numéricas
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numeric)

# Transformação da variável alvo em categorias
if y.dtype != np.number:
    y = y.astype("category").cat.codes
y_categorical = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_categorical, test_size=0.3, random_state=42)

  if y.dtype != np.number:


In [None]:
# Função para construir e compilar o modelo
def build_model(layers, neurons, dropout_rate):
    model = Sequential()
    model.add(Dense(neurons[0], activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dropout(dropout_rate[0]))

    for layer, neuron, dropout in zip(range(layers - 1), neurons[1:], dropout_rate[1:]):
        model.add(Dense(neuron, activation='relu'))
        model.add(Dropout(dropout))

    model.add(Dense(y_train.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Treinamento e avaliação com diferentes topologias
topologies = [
    {"layers": 1, "neurons": [64], "dropout_rate": [0.2]},
    {"layers": 2, "neurons": [64, 32], "dropout_rate": [0.2, 0.3]},
    {"layers": 3, "neurons": [128, 64, 32], "dropout_rate": [0.3, 0.3, 0.4]}
]

In [None]:
results = []
for idx, topology in enumerate(topologies, 1):
    print(f"Treinando modelo {idx} com topologia: {topology}")
    model = build_model(**topology)
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                        epochs=50, batch_size=32, verbose=1)
    # Avaliar o modelo
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    results.append({"Model": f"Modelo {idx}", "Accuracy": test_acc, "Loss": test_loss})
    print(f"Modelo {idx} - Acurácia: {test_acc:.4f} - Loss: {test_loss:.4f}")


Treinando modelo 1 com topologia: {'layers': 1, 'neurons': [64], 'dropout_rate': [0.2]}
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.3312 - loss: 1.1102 - val_accuracy: 0.3314 - val_loss: 1.1017
Epoch 2/50
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.3352 - loss: 1.1024 - val_accuracy: 0.3271 - val_loss: 1.1010
Epoch 3/50
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.3365 - loss: 1.1011 - val_accuracy: 0.3348 - val_loss: 1.0993
Epoch 4/50
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3395 - loss: 1.0996 - val_accuracy: 0.3372 - val_loss: 1.0992
Epoch 5/50
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3378 - loss: 1.0995 - val_accuracy: 0.3261 - val_loss: 1.1000
Epoch 6/50
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.3358 - loss: 1.0988 - val_accuracy: 0.3278 - val_loss: 1.0997
Epoch 7/50
[1m875/875[0m [32m━━━━━━━

In [None]:
results_df = pd.DataFrame(results)
print("\nResultados comparativos:")
print(results_df)


Resultados comparativos:
      Model  Accuracy      Loss
0  Modelo 1  0.329083  1.100001
1  Modelo 2  0.320083  1.100175
2  Modelo 3  0.328417  1.098920
