# Malicious Domain Dataset - Multi-layer Perceptron Classifier

In [16]:
# Load preprocessed dataset

import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold

datasource = "preprocessed.csv"
data = pd.read_csv(datasource)

In [28]:
# Simple model - no cross validation

clf = MLPClassifier()

X = data.drop(columns=["Class"])
y = data["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=8000, random_state=42)

y_train.value_counts()

clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.9895525538201773

## Resampling

As the preprocessed dataset is unbalanced, we resample to create a balanced set for training and testing.

In [27]:
from sklearn.utils import resample

sample_size = min(data.Class.value_counts())

malicious = resample(data.loc[data["Class"] == 0], replace=False, n_samples=sample_size, random_state=42)
benign = resample(data.loc[data["Class"] == 1], replace=False, n_samples=sample_size, random_state=42)

balanced = pd.concat([malicious, benign])

# Test model

clf = MLPClassifier()

X = balanced.drop(columns=["Class"])
y = balanced["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.9764089121887287

## Cross-validation