# Machine Learning

# MLP Classifier

In [None]:
#MLP Classifier

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import GridSearchCV

WannaCrypt=pd.read_csv('WannaCrypt.csv')
Nimda=pd.read_csv('Nimda.csv')
Slammer=pd.read_csv('Slammer.csv')
Moscow_blackout = pd.read_csv('Moscow_blackout.csv')
Code_Red_I = pd.read_csv('Code_Red_I.csv')

Datasets = [WannaCrypt, Nimda,Slammer, Moscow_blackout,Code_Red_I ]
names = ['WannaCrypt', 'Nimda','Slammer', 'Moscow_blackout','Code_Red_I' ]

for i in range(len(Datasets)):

    X = Datasets[i].drop(['H+M','H','M','S','MED1','MED2','MED3','MED4','MED5','MED6','MED7','MED8','MED9',
                          'MED10','MED11','MAL1','MAL2','MAL3','MAL4','MAL5','MAL6','MAL7','MAL8','MAL9',
                          'Classification'],axis=1)
    y = Datasets[i]['Classification']

    #Train, test and split the dataset. Random number generator, with popular integer see numbers are 0 and 42
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    #Pre-processing - transformation, etc...
    scaler = StandardScaler()

    # Fit only to the training data
    scaler.fit(X_train)

    # Now apply the transformations to the data:
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    parameter_space = {
    'activation' : ['identity', 'logistic', 'tanh', 'relu'],
    'solver' : ['lbfgs', 'sgd', 'adam'] }

    #Create an MLP model
    clf = GridSearchCV(MLPClassifier(max_iter=2000), parameter_space, n_jobs=-1, cv=3)

    #Fit the model
    classifier = clf.fit(X_train,y_train)
    #Prediction 
    y_pred = clf.predict(X_test)

    print('Best parameters found for', names[i],'Dataset :\n', clf.best_params_)
   
    #Model Evaluation
    print("Confusion Matrix is ")
    print(confusion_matrix(y_test, y_pred))
    print("\n")
    print(classification_report(y_test, y_pred))
    print("\n")

    # Plot non-normalized confusion matrix
    titles_options = [("Confusion matrix, without normalization for "+ names[i] + " Dataset", None),
                  ("Normalized confusion matrix for "+ names[i] + " Dataset", 'true')]
    for title, normalize in titles_options:
        disp = plot_confusion_matrix(classifier, X_test, y_test,
                                 cmap=plt.cm.Blues,
                                 normalize=normalize)
        disp.ax_.set_title(title)

        print(title)
        print(disp.confusion_matrix)

    plt.show()
    print(':\n')

# Decision Tree Classifier

In [None]:
#Decision Tree Classifier
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn.metrics import confusion_matrix, classification_report, plot_confusion_matrix
from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier 
from sklearn import tree
from sklearn.model_selection import GridSearchCV

#Load dataset and explore dataset
WannaCrypt=pd.read_csv('WannaCrypt.csv')
Nimda=pd.read_csv('Nimda.csv')
Slammer=pd.read_csv('Slammer.csv')
Moscow_blackout = pd.read_csv('Moscow_blackout.csv')
Code_Red_I = pd.read_csv('Code_Red_I.csv')

Datasets = [WannaCrypt, Nimda,Slammer, Moscow_blackout,Code_Red_I ]
names = ['WannaCrypt', 'Nimda','Slammer', 'Moscow_blackout','Code_Red_I' ]

for i in range(len(Datasets)):

    #Feature selection: split the dataset into features (independent variables) and target (dependent variable)
    X = Datasets[i].drop(['H+M','H','M','S','MED1','MED2','MED3','MED4','MED5','MED6','MED7',
                          'MED8','MED9','MED10','MED11','MAL1','MAL2','MAL3','MAL4','MAL5','MAL6',
                          'MAL7','MAL8','MAL9','Classification'],axis=1)
    y = Datasets[i]['Classification']

    # Split dataset into training set and test set,70% training and 30% test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

    parameter_space = {
        'criterion': ['gini', 'entropy'] }

    # Create Decision Tree classifer object
    clf = GridSearchCV(DecisionTreeClassifier(random_state=1234), parameter_space, n_jobs=-1, cv=3)

    # Train Decision Tree Classifer
    clf = clf.fit(X_train,y_train)

    #Predict the response for test dataset
    y_pred = clf.predict(X_test)

    print('Best parameters found for', names[i],'Dataset :\n', clf.best_params_)

    # Model Accuracy
    print("\n")
    print("Accuracy for 70% training set and 30% test set :",
              metrics.accuracy_score(y_test, y_pred))

    #Confusion matrix
    print("Confusion Matrix is")
    print(confusion_matrix(y_test, y_pred))
    print("\n")
    print(classification_report(y_test, y_pred))
    print("\n")

        # Plot non-normalized confusion matrix
    titles_options = [("Confusion matrix, without normalization for "+ names[i] + " Dataset", None),
                      ("Normalized confusion matrix for "+ names[i] + " Dataset", 'true')]
    for title, normalize in titles_options:
        disp = plot_confusion_matrix(clf, X_test, y_test,
                                     #display_labels=class_names,
                                     cmap=plt.cm.Blues,
                                     normalize=normalize)
        disp.ax_.set_title(title)

        print(title)
        print(disp.confusion_matrix)

    plt.show()
    print(':\n')





# KNN classifier

In [None]:
# KNN classifier 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from sklearn.model_selection import GridSearchCV
 
%matplotlib inline
 
#Import the data set
WannaCrypt=pd.read_csv('WannaCrypt.csv')
Nimda=pd.read_csv('Nimda.csv')
Slammer=pd.read_csv('Slammer.csv')
Moscow_blackout = pd.read_csv('Moscow_blackout.csv')
Code_Red_I = pd.read_csv('Code_Red_I.csv')

for i in range(len(Datasets)):
    Datasets = [WannaCrypt, Nimda,Slammer, Moscow_blackout,Code_Red_I ]
    names = ['WannaCrypt', 'Nimda','Slammer', 'Moscow_blackout','Code_Red_I' ]
    
    dataset = Datasets[i].drop(['H+M','H','M','S','MED1','MED2','MED3','MED4','MED5','MED6','MED7',
                              'MED8','MED9','MED10','MED11','MAL1','MAL2','MAL3','MAL4','MAL5','MAL6',
                              'MAL7','MAL8','MAL9','Classification'],axis=1)
    #Standardize the data set
    scaler = StandardScaler()
    scaler.fit(dataset)
    scaled_features = scaler.transform(dataset)
    scaled_data = pd.DataFrame(scaled_features, columns = dataset.columns)

    parameter_space = {
        'n_neighbors': list(range(1,100)),
        'weights': ['uniform', 'distance'] }

    #Split the data set into training data and test data
    X= scaled_data
    y = Datasets[i]['Classification']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

    #Train the model and make predictions
    model = GridSearchCV(KNeighborsClassifier(), parameter_space, n_jobs=-1, cv=3)
    #model = KNeighborsClassifier(n_neighbors = 1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print('Best parameters found for ',names[i],' Dataset :\n', model.best_params_)

    #Performance measurement
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

    titles_options = [("Confusion matrix, without normalization for "+ names[i] + " Dataset", None),
                          ("Normalized confusion matrix for "+ names[i] + " Dataset", 'true')]
    for title, normalize in titles_options:
        disp = plot_confusion_matrix(model, X_test, y_test,
                                         #display_labels=class_names,
                                         cmap=plt.cm.Blues,
                                         normalize=normalize)
        disp.ax_.set_title(title)

        print(title)
        print(disp.confusion_matrix)

    plt.show()
    print(':\n')

# Suppor Vector Machines Classifier

In [None]:
#Suppor Vector MAchines Classifier
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from sklearn.model_selection import GridSearchCV
 

#Import the dataset
WannaCrypt=pd.read_csv('WannaCrypt.csv')
Nimda=pd.read_csv('Nimda.csv')
Slammer=pd.read_csv('Slammer.csv')
Moscow_blackout = pd.read_csv('Moscow_blackout.csv')
Code_Red_I = pd.read_csv('Code_Red_I.csv')

for i in range(len(Datasets)):
    Datasets = [WannaCrypt, Nimda,Slammer, Moscow_blackout,Code_Red_I ]
    names = ['WannaCrypt', 'Nimda','Slammer', 'Moscow_blackout','Code_Red_I' ]
    
    dataset = Datasets[i].drop(['H+M','H','M','S','MED1','MED2','MED3','MED4','MED5','MED6','MED7',
                              'MED8','MED9','MED10','MED11','MAL1','MAL2','MAL3','MAL4','MAL5','MAL6',
                              'MAL7','MAL8','MAL9','Classification'],axis=1)
    #Standardize the data set
    scaler = StandardScaler()
    scaler.fit(dataset)
    scaled_features = scaler.transform(dataset)
    scaled_data = pd.DataFrame(scaled_features, columns = dataset.columns)

    parameter_space = {
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'] }

    #Split the data set into training data and test data
    X= scaled_data
    y = Datasets[i]['Classification']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

    #Train the model and make predictions
    model = GridSearchCV(SVC(), parameter_space, n_jobs=-1, cv=3)
    #model = KNeighborsClassifier(n_neighbors = 1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    
    print('Best parameters found for ',names[i],' Dataset :\n', model.best_params_)

    #Performance measurement
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

    titles_options = [("Confusion matrix, without normalization for "+ names[i] + " Dataset", None),
                          ("Normalized confusion matrix for "+ names[i] + " Dataset", 'true')]
    for title, normalize in titles_options:
        disp = plot_confusion_matrix(model, X_test, y_test,
                                         cmap=plt.cm.Blues,
                                         normalize=normalize)
        disp.ax_.set_title(title)

        print(title)
        print(disp.confusion_matrix)

    plt.show()
    print(':\n')


# Random Forests Classifier

In [None]:
# Random Forests Classifier
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from sklearn.model_selection import GridSearchCV
 
#Import the dataset
WannaCrypt=pd.read_csv('WannaCrypt.csv')
Nimda=pd.read_csv('Nimda.csv')
Slammer=pd.read_csv('Slammer.csv')
Moscow_blackout = pd.read_csv('Moscow_blackout.csv')
Code_Red_I = pd.read_csv('Code_Red_I.csv')

for i in range(len(Datasets)):
    Datasets = [WannaCrypt, Nimda,Slammer, Moscow_blackout,Code_Red_I ]
    names = ['WannaCrypt', 'Nimda','Slammer', 'Moscow_blackout','Code_Red_I' ]
    
    dataset = Datasets[i].drop(['H+M','H','M','S','MED1','MED2','MED3','MED4','MED5','MED6','MED7',
                              'MED8','MED9','MED10','MED11','MAL1','MAL2','MAL3','MAL4','MAL5','MAL6',
                              'MAL7','MAL8','MAL9','Classification'],axis=1)

    parameter_space = {
        'n_estimators': list(range(50,151)),
        'criterion': ['gini', 'entropy'] }
                             
    #Split the data set into training data and test data
    X= dataset
    y = Datasets[i]['Classification']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)

    #Train the model and make predictions
    model = GridSearchCV(RandomForestClassifier(), parameter_space, n_jobs=-1, cv=3)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    print('Best parameters found for ',names[i],' Dataset :\n', model.best_params_)

    #Performance measurement
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

    titles_options = [("Confusion matrix, without normalization for "+ names[i] + " Dataset", None),
                          ("Normalized confusion matrix for "+ names[i] + " Dataset", 'true')]
    for title, normalize in titles_options:
        disp = plot_confusion_matrix(model, X_test, y_test,
                                         cmap=plt.cm.Blues,
                                         normalize=normalize)
        disp.ax_.set_title(title)

        print(title)
        print(disp.confusion_matrix)

    plt.show()
    print(':\n')


# Naive Bayes Classifier

In [None]:
#Naive Bayes Classifier
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from sklearn.model_selection import GridSearchCV
 

#Import the dataset
WannaCrypt=pd.read_csv('WannaCrypt.csv')
Nimda=pd.read_csv('Nimda.csv')
Slammer=pd.read_csv('Slammer.csv')
Moscow_blackout = pd.read_csv('Moscow_blackout.csv')
Code_Red_I = pd.read_csv('Code_Red_I.csv')
c_SVC = np.logspace(start = 0, stop = 10, num = 100, base = 2 , dtype = 'float64')
Datasets = [WannaCrypt, Nimda,Slammer, Moscow_blackout,Code_Red_I ]
names = ['WannaCrypt', 'Nimda','Slammer', 'Moscow_blackout','Code_Red_I' ]

for i in range(len(Datasets)):

    
    dataset = Datasets[i].drop(['H+M','H','M','S','MED1','MED2','MED3','MED4','MED5','MED6','MED7',
                              'MED8','MED9','MED10','MED11','MAL1','MAL2','MAL3','MAL4','MAL5','MAL6',
                              'MAL7','MAL8','MAL9','Classification'],axis=1)

    #Split the data set into training data and test data
    X= dataset
    y = Datasets[i]['Classification']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)

    #Train the model and make predictions
    model = MultinomialNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)


    #Performance measurement
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

    titles_options = [("Confusion matrix, without normalization for "+ names[i] + " Dataset", None),
                          ("Normalized confusion matrix for "+ names[i] + " Dataset", 'true')]
    for title, normalize in titles_options:
        disp = plot_confusion_matrix(model, X_test, y_test,
                                         #display_labels=class_names,
                                         cmap=plt.cm.Blues,
                                         normalize=normalize)
        disp.ax_.set_title(title)

        print(title)
        print(disp.confusion_matrix)

    plt.show()
    print(':\n')
