<a href="https://colab.research.google.com/github/jvitorc/TCC/blob/main/ExplorandoRedesNeurais.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### João Vitor Cardoso <2020>

# **Explorando o uso de Redes Neurais para Deteção de Intrusão**



  Usando a base [CSE-CIC-IDS2018](https://www.unb.ca/cic/datasets/ids-2018.html) para detecção de intrusão com redes neurais de ataques DOS

  

# Baixando Base da Dados

In [None]:
# Baixando awc-cli
!curl "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" -o "awscli-bundle.zip"
!unzip awscli-bundle.zip
!sudo ./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws

In [None]:
# Baixando CSV do ataques DOS
!aws s3 cp --no-sign-request --region sa-east-1 "s3://cse-cic-ids2018/Processed Traffic Data for ML Algorithms/Friday-16-02-2018_TrafficForML_CICFlowMeter.csv" "./CSE-CIC-IDS2018/"

# Pré-processamto dos dados

In [None]:
# Importando bibliotecas
import pandas as pd
import numpy as np

In [None]:
# Carregando dados
dataset = pd.read_csv('CSE-CIC-IDS2018/Friday-16-02-2018_TrafficForML_CICFlowMeter.csv')

# Criando histórico de tamanho do conjunto de dados
size_history = [( "original", dataset.shape[0])]

  interactivity=interactivity, compiler=compiler, result=result)


In [None]:
# Removendo registro com nomes das colunas
dataset = dataset[dataset['Protocol'] != 'Protocol']

# Registrando alteração no tamanho dos dados
size_history.append(( "removendo nomes das colunas", dataset.shape[0]))

In [None]:
# Removendo coluna timestamp
timestamp = dataset.pop('Timestamp')

In [None]:
# Separando alvos
targets = dataset.pop('Label')

# Atualizando tipo da coluna targets
targets = pd.Categorical(targets)

In [None]:
# Convertendo tipo de dados
dataset = dataset.apply(pd.to_numeric)

In [None]:
# Gerando descrição estatistica para os dados
features_stats = dataset.describe().transpose()

In [None]:
# Criando função para normalização dos dados
def norm(x):
  return (x - features_stats['mean']) / features_stats['std']

# Normalizando dados
normed_dataset = norm(dataset)

# Removendo divisão por zero
normed_dataset = normed_dataset.replace(np.nan, 0)

In [None]:
# Criando rotulos multiclasses
multiclass_label = targets.codes

In [None]:
size_history

[('original', 1048575), ('removendo nomes das colunas', 1048574)]

In [None]:
# Criando rotulos classe binaria
binary_label = (targets.codes == 1)*1.0 + (targets.codes == 2)*1.0

# Rede Neural (Modelos)

In [None]:
# Importando bibliotecas
import tensorflow as tf
from tensorflow import  keras

In [None]:
# Modelo binario
def model_156():
  model = keras.Sequential([
    keras.layers.Dense(156, activation='relu', input_shape=[78]),
    keras.layers.Dense(1, activation='sigmoid'),
  ])

  model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

  return model

In [None]:
def model_78():
  model = keras.Sequential([
    keras.layers.Dense(78, activation='relu', input_shape=[78]),
    keras.layers.Dense(1, activation='sigmoid'),
  ])

  model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

  return model

In [None]:
def model_39():
  model = keras.Sequential([
    keras.layers.Dense(39, activation='relu', input_shape=[78]),
    keras.layers.Dense(1, activation='sigmoid'),
  ])

  model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

  return model

In [None]:
def model_15():
  model = keras.Sequential([
    keras.layers.Dense(15, activation='relu', input_shape=[78]),
    keras.layers.Dense(1, activation='sigmoid'),
  ])

  model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

  return model

In [None]:
def model_10():
  model = keras.Sequential([
    keras.layers.Dense(10, activation='relu', input_shape=[78]),
    keras.layers.Dense(1, activation='sigmoid'),
  ])

  model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

  return model

In [None]:
def model_5():
  model = keras.Sequential([
    keras.layers.Dense(5, activation='relu', input_shape=[78]),
    keras.layers.Dense(1, activation='sigmoid'),
  ])

  model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

  return model

# Funções de treinamento

In [None]:
# Importando biblioteca
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import math

## Modelo binario

In [None]:
def binary_training(X_train, X_test, y_train, y_test, f_model):

  # Instanciando modelo binario
  model = f_model()
  print(model.summary())
  # Treinamento
  history = model.fit(X_train, y_train, epochs=10)

  # Avaliação
  loss, acc = model.evaluate(X_test, y_test)

  # Previsões
  predictions = model.predict_classes(X_test)

  # Matriz de Confusão
  conf_matrix = confusion_matrix(y_test, predictions)

  # ..........
  tn, fp, fn, tp = confusion_matrix(y_test, predictions).ravel()

  # Precisão
  precision = tp / (tp+fp)

  # Recall
  recall = tp / (tp+fn)

  # TNR 
  TNR = tn / (tn+fp)

  # f1_score
  f1_score = 2 * ( (precision * recall)/(precision + recall))

  # MCC
  MCC = 0
  a,b,c,d = (tp+fp),(fn+tn),(fp+tn),(tp+fn)
  if 0 in [a,b,c,d]:
    MCC = (tp * tn - fp * fn)/math.sqrt(a*b*c*d)


  return {
      'model': model,
      'conf_matrix': conf_matrix,
      'history': history, 
      'evaluate': {'loss': loss, 'acc': acc},
      'statistic': {
          'precision': precision,
          'recall': recall,
          'TNR': TNR,
          'f1_score': f1_score
      }
  }

## Cross Validation

In [None]:
from sklearn.model_selection import KFold

In [None]:
def cross_validation(data, label, f_model, f, n=10):
  kf = KFold(n_splits=n)
  info = []
  i = 0
  for train_index, test_index in kf.split(data):
    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = label[train_index], label[test_index]

    i = i+1
    print(f"Folder {i}\n")
    info.append( f(X_train, X_test, y_train, y_test, f_model) )

  return info

In [None]:
def save_statistic(info, filepath):
  st = pd.DataFrame(columns=['precision', 'recall', 'TNR', 'f1_score'])
  for model in info:
    st = st.append(model['statistic'], ignore_index=True)

  filename = filepath+'statistic.csv'
  st.to_csv(filename, encoding='utf-8', index=False)


In [None]:
def save_evaluate(info, filepath):
  et = pd.DataFrame(columns=['acc', 'loss'])
  for model in info:
    et = et.append(model['evaluate'], ignore_index=True)

  filename = filepath+'evaluate.csv'
  et.to_csv(filename,encoding='utf-8', index=False)


In [None]:
def save_confusion_matrix(info, filepath):
  cm = 'CONFUSION MATRIX\n'
  i = 0
  for model in info:
    i += 1
    m = model['conf_matrix'].tolist()
    cm += f'FOLDER {str(i)}\n'
    cm += f',BEGIN,INTRUSION\n'
    cm += f'BEGIN,{str(m[0][0])},{str(m[0][1])}\n'
    cm += f'INTRUSION,{str(m[1][0])},{str(m[1][1])}\n\n'

  filename = filepath+'confusion_matrix.csv'
  with open(filename, 'w') as f:
    f.write(cm)

In [None]:
def save_history(info, filepath):
  hs = list(map(lambda x: x['history'].history, info))
  df = pd.DataFrame()
  i = 0
  for h in hs:
    i += 1
    for key, value in h.items():
      df[f'FOLDER{i}-{key}'] = value

  filename = filepath+'history.csv'
  df.to_csv(filename,encoding='utf-8', index=False)

In [None]:
def save_model(info, filepath):
  models = list(map(lambda x: x['model'], info))

  for x in range(1, len(models)+1):
    filename = filepath + f'model-{x}'
    models[x].save(filename)

In [None]:
def salve(info, filepath='training/'):
  save_statistic(info, filepath)
  save_evaluate(info, filepath)
  save_confusion_matrix(info, filepath)
  save_history(info, filepath)
  save_model(info, filepath)

# Treinamento

## Dataset-Modelo-156

In [None]:
info = cross_validation(dataset.values, binary_label, model_156, binary_training)

In [None]:
!mkdir dataset_model_156

In [None]:
salve(info, 'dataset_model_156/')

In [None]:
!zip -r dataset_model_156.zip dataset_model_156/

## Dataset-Modelo-78

In [None]:
info = cross_validation(dataset.values, binary_label, model_78, binary_training)

In [None]:
!mkdir dataset_model_78

In [None]:
salve(info, 'dataset_model_78/')

In [None]:
!zip -r dataset_model_78.zip dataset_model_78/

## Dataset-Modelo-39

In [None]:
info = cross_validation(dataset.values, binary_label, model_39, binary_training)

In [None]:
!mkdir dataset_model_39

In [None]:
salve(info, 'dataset_model_39/')

In [None]:
!zip -r dataset_model_39.zip dataset_model_39/

## Dataset-Modelo-15

In [None]:
info = cross_validation(dataset.values, binary_label, model_15, binary_training)

In [None]:
!mkdir dataset_model_15

In [None]:
salve(info, 'dataset_model_15/')

In [None]:
!zip -r dataset_model_15.zip dataset_model_15/

## Dataset-Modelo-10

In [None]:
info = cross_validation(dataset.values, binary_label, model_10, binary_training)

In [None]:
!mkdir dataset_model_10

In [None]:
salve(info, 'dataset_model_10/')

In [None]:
!zip -r dataset_model_10.zip dataset_model_10/

## Dataset-Modelo-5

In [None]:
info = cross_validation(dataset.values, binary_label, model_5, binary_training)

In [None]:
!mkdir dataset_model_5

In [None]:
salve(info, 'dataset_model_5/')

In [None]:
!zip -r dataset_model_5.zip dataset_model_5/

# Estratégia OneVsAll

In [None]:
# Importar estratégia
from sklearn.multiclass import OneVsRestClassifier
# Importar MLP
from sklearn.neural_network import MLPClassifier

In [None]:
mlpClassifier = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100, 1), random_state=1)

In [None]:
mlpClassifier.fit(dataset.values,multiclass_label)