## Importing All The Necessary Stuff

In [None]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Loading The Data to prepare a Model

In [None]:
# Load the data from CSV file
# df = pd.read_csv(r"D:\Stuff\CyberSec\archive\03-02-2018.csv")
df = pd.read_csv(r"D:\Stuff\CyberSec\MachineLearningCSV\MachineLearningCVE\Wednesday-workingHours.pcap_ISCX.csv")

# Remove any rows with missing values
# df = df.dropna()

# Drop columns where all values are 0
# df = df.loc[:, (df != 0).any(axis=0)]

columns = list(df.columns)

### Display the Imported Model

In [None]:
# display the dataframe
df.head()

### Printing all the Features available in our Model

In [None]:
# Print column names with their respective column numbers
for i, col_name in enumerate(df.columns):
    print(f"Feature {i+1}:\t\"{col_name}\"")

## Preparing the Classification model using Decision Tree Classifier

### Training and Obtaining accuracy for a **`range`** of features **`individually`**

In [None]:
# to display individual features with a given range
startC = 1
endC = 10
for start_col in range(startC-1, endC, 1):
    end_col = start_col + 1
    try:
        X = df.iloc[:, start_col:end_col].values
        y = df.iloc[:, -1].values

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42)

        # Train the decision tree classifier
        clf = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, alpha=0.0001,
                    solver='adam', random_state=42, tol=0.0001)
        clf.fit(X_train, y_train)

        # Test the classifier
        accuracy = clf.score(X_test, y_test)
        print(f"Accuracy for column '{columns[end_col-1]}' : {accuracy}")
    except:
        print(f"'{columns[end_col-1]}' isn't a feature column")


### Training and Obtaining accuracy for a **`group`** of features **`together`**

In [None]:
# Select the 6th column as the feature and the last column as the label
# example:
# from 4th col to 16th col
bCol = 4
eCol = 10
X = df.iloc[:, bCol-1:eCol].values
# X = df.iloc[:, 3:4].values
y = df.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Train the decision tree classifier
clf = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, alpha=0.0001,
                    solver='adam', random_state=42, tol=0.0001)
clf.fit(X_train, y_train)

# Test the classifier
accuracy = clf.score(X_test, y_test)

selected_cols = df.iloc[:,bCol-1:eCol]

print("Accuracy for the following features combined",selected_cols.columns.tolist(),"is: ",accuracy)


### Obtaining **`group`** accuracy with **`Feature names`** as Input

In [None]:
# specify column names to select
selected_cols = ['Fwd Pkts/s','Bwd Pkts/s']

# select columns by name using loc
X = df.loc[:, selected_cols].values
y = df.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Train the decision tree classifier
clf = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, alpha=0.0001,
                    solver='adam', random_state=42, tol=0.0001)
clf.fit(X_train, y_train)

# Test the classifier
accuracy = clf.score(X_test, y_test)


print("Accuracy for the following features combined",selected_cols,"is: ",accuracy)


### Obtaining **`individual`** accuracy with **`Feature names`** as Input

In [None]:
# Define the list of column names to iterate over
col_names = ['Fwd Pkts/s','Bwd Pkts/s']

for col_name in col_names:
    try:
        X = df[[col_name]].values
        y = df.iloc[:, -1].values

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42)

        # Train the decision tree classifier
        clf = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, alpha=0.0001,
                    solver='adam', random_state=42, tol=0.0001)
        clf.fit(X_train, y_train)

        # Test the classifier
        accuracy = clf.score(X_test, y_test)
        print(f"Accuracy for column '{col_name}' : {accuracy}")
    except:
        print(f"'{col_name}' isn't a feature column")


### Obtaining **`individual`** accuracy with **`Feature numbers`** as Input

In [None]:
cols = [1,2,3,4,9,13,17,22,27,31,32,33,34,35,36,41,67,68,71,75] # Example list of column indices to test
for start_col in cols:
    start_col-=1
    end_col = start_col + 1
    try:
        X = df.iloc[:, start_col:end_col].values
        y = df.iloc[:, -1].values

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42)

        # Train the decision tree classifier
        clf = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, alpha=0.0001,
                    solver='adam', random_state=42, tol=0.0001)
        clf.fit(X_train, y_train)

        # Test the classifier
        accuracy = clf.score(X_test, y_test)
        print(f"Accuracy for column '{columns[end_col-1]}' : {accuracy}")
    except:
        print(f"'{columns[end_col-1]}' isn't a feature column")


### Obtaining **`group`** accuracy with **`Feature numbers`** as Input

In [None]:
# specify column indexes to select
selected_cols_idx = [1,2,3,4,9,13,17,22,27,31,32,33,34,35,36,41,67,68,71,75]

selected_cols_idx = [x - 1 for x in selected_cols_idx]

# select columns by index using iloc
X = df.iloc[:, selected_cols_idx].values
y = df.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Train the decision tree classifier
clf = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, alpha=0.0001,
                    solver='adam', random_state=42, tol=0.0001)
clf.fit(X_train, y_train)

# Test the classifier
accuracy = clf.score(X_test, y_test)

# Get the names of the selected columns
selected_cols = list(df.columns[selected_cols_idx])

print("Accuracy for the following features combined", selected_cols, "is:", accuracy)
