# Challenge: 

# Imports

In [231]:
import torch;
from torch import nn;
import numpy;
import matplotlib.pyplot as plt;
import pandas as pd;
import sklearn;

from ForwardModelNN import FeedForwardNN;

device: str = "cuda"  if torch.cuda.is_available() else "cpu";

# Installing Dependencies

In [232]:
%pip install pandas
%pip install numpy

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


# Data

## Loading the Data

In [233]:
df = pd.read_csv('dataset/participant_data_cleaned_zero.csv');
column_names = df.columns;
for col in column_names: print("Feature: " + col);

df['_time'] = pd.to_datetime(df['_time'])
df

Feature: _time
Feature: Altitude
Feature: Latitude
Feature: Longitude
Feature: http_result
Feature: icmp_result
Feature: tcp_result
Feature: udp_result


Unnamed: 0,_time,Altitude,Latitude,Longitude,http_result,icmp_result,tcp_result,udp_result
0,2024-09-20 17:19:03+00:00,20.292809,45.496175,-73.561853,29.955,59.476,30.031,69.706
1,2024-09-20 17:19:01+00:00,20.292809,45.496175,-73.561853,33.209,31.992,29.947,42.514
2,2024-09-20 17:18:59+00:00,20.292809,45.496175,-73.561853,30.057,64.112,30.036,74.137
3,2024-09-20 17:18:57+00:00,20.292809,45.496175,-73.561853,39.228,43.609,30.135,53.984
4,2024-09-20 17:18:55+00:00,20.292809,45.496175,-73.561853,34.133,35.945,30.996,44.054
...,...,...,...,...,...,...,...,...
1929,2024-09-16 17:13:27+00:00,18.823519,45.496026,-73.561997,12.473,11.588,12.961,14.024
1930,2024-09-16 17:13:25+00:00,18.823519,45.496026,-73.561997,22.238,12.144,12.141,20.624
1931,2024-09-16 17:13:23+00:00,18.823519,45.496026,-73.561997,14.019,17.161,14.217,24.523
1932,2024-09-16 17:13:21+00:00,18.823519,45.496026,-73.561997,11.645,12.584,13.018,17.155


In [234]:
import numpy as np;
import random;
origData = df.to_numpy();
randSample = origData[random.randint(0,len(origData)-1)]; # for inspection

print("Type of Array with original data: " + str(type(origData)));
print("Type of a record in the original data: " + str(type(randSample)));
print("Number of records in the original dataset: " + str(len(origData)));
print("Number of features in a record: " + str(randSample.size));
print("Dimensionality of a record: " + str(randSample.ndim));
print("Dimensionality of the entire data array: " + str(origData.ndim));
print("Shape of the Array: " + str(origData.shape));
print("Shape of a record: " + str(randSample.shape));

Type of Array with original data: <class 'numpy.ndarray'>
Type of a record in the original data: <class 'numpy.ndarray'>
Number of records in the original dataset: 1934
Number of features in a record: 8
Dimensionality of a record: 1
Dimensionality of the entire data array: 2
Shape of the Array: (1934, 8)
Shape of a record: (8,)


We saw NaN values in the table. That is because of empty cells in the table. To prove our intuition, let's inspect whether there are NaN in specific columns.

In [235]:
df.isna().any()

_time          False
Altitude       False
Latitude       False
Longitude      False
http_result    False
icmp_result    False
tcp_result     False
udp_result     False
dtype: bool

## Data Cleansing

We need to replace all the NaN values with 0.

In [236]:
df = df.fillna(0)
df

Unnamed: 0,_time,Altitude,Latitude,Longitude,http_result,icmp_result,tcp_result,udp_result
0,2024-09-20 17:19:03+00:00,20.292809,45.496175,-73.561853,29.955,59.476,30.031,69.706
1,2024-09-20 17:19:01+00:00,20.292809,45.496175,-73.561853,33.209,31.992,29.947,42.514
2,2024-09-20 17:18:59+00:00,20.292809,45.496175,-73.561853,30.057,64.112,30.036,74.137
3,2024-09-20 17:18:57+00:00,20.292809,45.496175,-73.561853,39.228,43.609,30.135,53.984
4,2024-09-20 17:18:55+00:00,20.292809,45.496175,-73.561853,34.133,35.945,30.996,44.054
...,...,...,...,...,...,...,...,...
1929,2024-09-16 17:13:27+00:00,18.823519,45.496026,-73.561997,12.473,11.588,12.961,14.024
1930,2024-09-16 17:13:25+00:00,18.823519,45.496026,-73.561997,22.238,12.144,12.141,20.624
1931,2024-09-16 17:13:23+00:00,18.823519,45.496026,-73.561997,14.019,17.161,14.217,24.523
1932,2024-09-16 17:13:21+00:00,18.823519,45.496026,-73.561997,11.645,12.584,13.018,17.155


In [237]:
# ensure there is no more NaN values in the table
df.isna().any()

_time          False
Altitude       False
Latitude       False
Longitude      False
http_result    False
icmp_result    False
tcp_result     False
udp_result     False
dtype: bool

Now there's no more NaN values in table.

In [238]:
df

Unnamed: 0,_time,Altitude,Latitude,Longitude,http_result,icmp_result,tcp_result,udp_result
0,2024-09-20 17:19:03+00:00,20.292809,45.496175,-73.561853,29.955,59.476,30.031,69.706
1,2024-09-20 17:19:01+00:00,20.292809,45.496175,-73.561853,33.209,31.992,29.947,42.514
2,2024-09-20 17:18:59+00:00,20.292809,45.496175,-73.561853,30.057,64.112,30.036,74.137
3,2024-09-20 17:18:57+00:00,20.292809,45.496175,-73.561853,39.228,43.609,30.135,53.984
4,2024-09-20 17:18:55+00:00,20.292809,45.496175,-73.561853,34.133,35.945,30.996,44.054
...,...,...,...,...,...,...,...,...
1929,2024-09-16 17:13:27+00:00,18.823519,45.496026,-73.561997,12.473,11.588,12.961,14.024
1930,2024-09-16 17:13:25+00:00,18.823519,45.496026,-73.561997,22.238,12.144,12.141,20.624
1931,2024-09-16 17:13:23+00:00,18.823519,45.496026,-73.561997,14.019,17.161,14.217,24.523
1932,2024-09-16 17:13:21+00:00,18.823519,45.496026,-73.561997,11.645,12.584,13.018,17.155


In [239]:
import random;
origData = df.to_numpy();
randSample = origData[random.randint(0,len(origData)-1)]; # for inspection

print("Type of Array with original data: " + str(type(origData)));
print("Type of a record in the original data: " + str(type(randSample)));
print("Number of records in the original dataset: " + str(len(origData)));
print("Number of features in a record: " + str(randSample.size));
print("Dimensionality of a record: " + str(randSample.ndim));
print("Dimensionality of the entire data array: " + str(origData.ndim));
print("Shape of the Array: " + str(origData.shape));
print("Shape of a record: " + str(randSample.shape));

Type of Array with original data: <class 'numpy.ndarray'>
Type of a record in the original data: <class 'numpy.ndarray'>
Number of records in the original dataset: 1934
Number of features in a record: 8
Dimensionality of a record: 1
Dimensionality of the entire data array: 2
Shape of the Array: (1934, 8)
Shape of a record: (8,)


## Visualizing the Data

## Splitting the Datasets 

In [240]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# From web
def create_window(target, feature, window=1, offset=0):
    feature_new, target_new = [], []
    feature_np = feature.to_numpy()
    target_np = target.to_numpy()

    for i in range(window, target.shape[0] - offset):
        feature_list = feature_np[i - window:i]
        feature_new.append(feature_list.reshape(window, feature_np.shape[1]))
        
        target_new.append(target_np[i + offset])  # Keep target shape intact

    return np.array(feature_new), np.array(target_new)

scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df.drop(columns='_time'))
df_scaled = pd.DataFrame(df_scaled, columns=df.drop(columns='_time').columns)

window = 30
feature_columns = ["Latitude", "Longitude", "Altitude", 'http_result', 'icmp_result', 'tcp_result', 'udp_result']
target_columns = ["http_result", "tcp_result", "udp_result", "icmp_result"]

feature, target = create_window(df_scaled[target_columns], df_scaled[feature_columns], window=window)

# Ensure the shapes are as expected
print("Feature shape:", feature.shape)
print("Target shape:", target.shape)


x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.2)

Feature shape: (1904, 30, 7)
Target shape: (1904, 4)


# Machine Learning Modelling

## Creating the Model

In [241]:
import tensorflow as tf
from keras import layers

print(y_train.shape)
print(x_train.shape)

model = tf.keras.Sequential([
    layers.LSTM(64, return_sequences=True, input_shape=[window, 7]), # 30, 4
    layers.Dropout(0.2),

    layers.LSTM(256),
    layers.Dropout(0.2),

    layers.Dense(4)])

model.compile(
    # optimizer=tf.keras.optimizers.RMSprop(0.0001),
    optimizer='RMSProp', 
    loss='mse')

(1523, 4)
(1523, 30, 7)


2024-10-06 12:48:02.117670: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-10-06 12:48:02.118344: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-10-06 12:48:02.119019: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

## Fitting the Model with Training Data

In [242]:
model.summary()

print(x_train.shape)
print(y_train.shape)

history = model.fit(
    x_train,
    y_train,
    validation_split=0.3,
    epochs=50
)

Model: "sequential_31"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_37 (LSTM)              (None, 30, 64)            18432     
                                                                 
 dropout_12 (Dropout)        (None, 30, 64)            0         
                                                                 
 lstm_38 (LSTM)              (None, 256)               328704    
                                                                 
 dropout_13 (Dropout)        (None, 256)               0         
                                                                 
 dense_31 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 348,164
Trainable params: 348,164
Non-trainable params: 0
_________________________________________________________________
(1523, 30, 7)
(1523, 4)
Epoch 1/50


2024-10-06 12:48:02.362863: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-10-06 12:48:02.363770: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-10-06 12:48:02.364609: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2024-10-06 12:48:05.217239: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-10-06 12:48:05.217871: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-10-06 12:48:05.218948: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Verify if Fitting is performed correctly

# Perform Predictions with the Model

In [243]:
from sklearn.metrics import r2_score
import seaborn as sns

# Make predictions
y_prob = model.predict(x_test)

# Extract predictions and actual values for HTTP results
y_prob_http = y_prob[:, 0]  # Predictions for HTTP
y_test_http = y_test[:, 0]  # Actual values for HTTP

y_prob_icmp = y_prob[:, 1]  # Predictions for HTTP
y_test_icmp = y_test[:, 1]  # Actual values for HTTP

print("R² Score for http_result:", r2_score(y_test_icmp, y_prob_icmp))

2024-10-06 12:49:20.268681: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-10-06 12:49:20.269462: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-10-06 12:49:20.270089: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

R² Score for http_result: 0.2433031454373813


## Saving model
---

In [245]:
model.save_weights("./models/modelv1.h5");

# Conclusion and Further Discussion
---