# Challenge: 

# Imports

In [4]:
import torch;
from torch import nn;
import numpy;
import matplotlib.pyplot as plt;
import pandas as pd;
import sklearn;

from ForwardModelNN import FeedForwardNN;

device: str = "cuda"  if torch.cuda.is_available() else "cpu";

# Installing Dependencies

In [1]:
%pip install pandas
%pip install numpy

Defaulting to user installation because normal site-packages is not writeable


# Data

## Loading the Data

In [98]:
df = pd.read_csv('dataset/participant_data.csv');
column_names = df.columns;
for col in column_names: print("Feature: " + col);
df

Feature: _time
Feature: agentID
Feature: Altitude
Feature: Latitude
Feature: Longitude
Feature: http_result
Feature: icmp_result
Feature: tcp_result
Feature: udp_result


Unnamed: 0,_time,agentID,Altitude,Latitude,Longitude,http_result,icmp_result,tcp_result,udp_result
0,2024-09-20 17:19:05+00:00,752813491.0,20.292809,45.496175,-73.561853,29.991,49.161,30.106,58.090
1,2024-09-20 17:19:03+00:00,752813491.0,20.292809,45.496175,-73.561853,29.955,59.476,30.031,69.706
2,2024-09-20 17:19:01+00:00,752813491.0,20.292809,45.496175,-73.561853,33.209,31.992,29.947,42.514
3,2024-09-20 17:18:59+00:00,752813491.0,20.292809,45.496175,-73.561853,30.057,64.112,30.036,74.137
4,2024-09-20 17:18:57+00:00,752813491.0,20.292809,45.496175,-73.561853,39.228,43.609,30.135,53.984
...,...,...,...,...,...,...,...,...,...
2593,,,,,,,,,
2594,,,,,,,,,
2595,,,,,,,,,
2596,,,,,,,,,


In [99]:
import numpy as np;
import random;
origData = df.to_numpy();
randSample = origData[random.randint(0,len(origData)-1)]; # for inspection

print("Type of Array with original data: " + str(type(origData)));
print("Type of a record in the original data: " + str(type(randSample)));
print("Number of records in the original dataset: " + str(len(origData)));
print("Number of features in a record: " + str(randSample.size));
print("Dimensionality of a record: " + str(randSample.ndim));
print("Dimensionality of the entire data array: " + str(origData.ndim));
print("Shape of the Array: " + str(origData.shape));
print("Shape of a record: " + str(randSample.shape));

Type of Array with original data: <class 'numpy.ndarray'>
Type of a record in the original data: <class 'numpy.ndarray'>
Number of records in the original dataset: 2598
Number of features in a record: 9
Dimensionality of a record: 1
Dimensionality of the entire data array: 2
Shape of the Array: (2598, 9)
Shape of a record: (9,)


We saw NaN values in the table. That is because of empty cells in the table. To prove our intuition, let's inspect whether there are NaN in specific columns.

In [100]:
df.isna().any()

_time          True
agentID        True
Altitude       True
Latitude       True
Longitude      True
http_result    True
icmp_result    True
tcp_result     True
udp_result     True
dtype: bool

## Data Cleansing

We need to replace all the NaN values with 0.

In [101]:
df = df.fillna(0)
df

Unnamed: 0,_time,agentID,Altitude,Latitude,Longitude,http_result,icmp_result,tcp_result,udp_result
0,2024-09-20 17:19:05+00:00,752813491.0,20.292809,45.496175,-73.561853,29.991,49.161,30.106,58.090
1,2024-09-20 17:19:03+00:00,752813491.0,20.292809,45.496175,-73.561853,29.955,59.476,30.031,69.706
2,2024-09-20 17:19:01+00:00,752813491.0,20.292809,45.496175,-73.561853,33.209,31.992,29.947,42.514
3,2024-09-20 17:18:59+00:00,752813491.0,20.292809,45.496175,-73.561853,30.057,64.112,30.036,74.137
4,2024-09-20 17:18:57+00:00,752813491.0,20.292809,45.496175,-73.561853,39.228,43.609,30.135,53.984
...,...,...,...,...,...,...,...,...,...
2593,0,0.0,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000
2594,0,0.0,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000
2595,0,0.0,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000
2596,0,0.0,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000


In [102]:
# ensure there is no more NaN values in the table
df.isna().any()

_time          False
agentID        False
Altitude       False
Latitude       False
Longitude      False
http_result    False
icmp_result    False
tcp_result     False
udp_result     False
dtype: bool

Now there's no more NaN values in table.

In [106]:
df

Unnamed: 0,_time,agentID,Altitude,Latitude,Longitude,http_result,icmp_result,tcp_result,udp_result
0,2024-09-20 17:19:05+00:00,752813491.0,20.292809,45.496175,-73.561853,29.991,49.161,30.106,58.090
1,2024-09-20 17:19:03+00:00,752813491.0,20.292809,45.496175,-73.561853,29.955,59.476,30.031,69.706
2,2024-09-20 17:19:01+00:00,752813491.0,20.292809,45.496175,-73.561853,33.209,31.992,29.947,42.514
3,2024-09-20 17:18:59+00:00,752813491.0,20.292809,45.496175,-73.561853,30.057,64.112,30.036,74.137
4,2024-09-20 17:18:57+00:00,752813491.0,20.292809,45.496175,-73.561853,39.228,43.609,30.135,53.984
...,...,...,...,...,...,...,...,...,...
2593,0,0.0,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000
2594,0,0.0,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000
2595,0,0.0,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000
2596,0,0.0,0.000000,0.000000,0.000000,0.000,0.000,0.000,0.000


In [105]:
import random;
origData = df.to_numpy();
randSample = origData[random.randint(0,len(origData)-1)]; # for inspection

print("Type of Array with original data: " + str(type(origData)));
print("Type of a record in the original data: " + str(type(randSample)));
print("Number of records in the original dataset: " + str(len(origData)));
print("Number of features in a record: " + str(randSample.size));
print("Dimensionality of a record: " + str(randSample.ndim));
print("Dimensionality of the entire data array: " + str(origData.ndim));
print("Shape of the Array: " + str(origData.shape));
print("Shape of a record: " + str(randSample.shape));

Type of Array with original data: <class 'numpy.ndarray'>
Type of a record in the original data: <class 'numpy.ndarray'>
Number of records in the original dataset: 2598
Number of features in a record: 9
Dimensionality of a record: 1
Dimensionality of the entire data array: 2
Shape of the Array: (2598, 9)
Shape of a record: (9,)


## Data Preprocessing

In [None]:
# Check the ranges of data


## Visualizing the Data

## Splitting the Datasets 

# Machine Learning Modelling

## Creating the Model

In [None]:
# Instantiate and move the model to the correct device
model = FeedForwardNN(input_shape=3, hidden_units=128, output_shape=1).to(device)


## Fitting the Model with Training Data

## Verify if Fitting is performed correctly

# Perform Predictions with the Model

# Evaluate the Performance of the Model

# Conclusion and Further Discussion