In [49]:
import numpy as np
import pandas as pd
import matplotlib as plt

In [None]:
df = pd.read_csv('data/participant_data.csv')
df = df.drop(df.index[-1]) # not a record

df['_time'] = pd.to_datetime(df['_time'])

df

In [51]:
# From web
def create_window(target, feature, window=1, offset=0):
    feature_new, target_new = [], []
    feature_np = feature.to_numpy()
    target_np = target.to_numpy()
    for i in range(window, target.shape[0] - offset):
        feature_list = feature_np[i - window:i]
        feature_new.append(feature_list.reshape(window, feature_np.shape[1]))
        target_new.append(target_np[i+offset].reshape(1))
    return np.array(feature_new), np.array(target_new)

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Scale all except date
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df.drop(columns='_time'))
df_scaled = pd.DataFrame(df_scaled, columns=df.drop(columns='_time').columns)

window = 30
feature_columns = ['http_result', 'icmp_result', 'tcp_result', 'udp_result']

feature, target = create_window(df_scaled['http_result'],df_scaled[feature_columns], window=window)
print(feature[0])
print(target[0])
print(df_scaled.head(12))

df.head

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.2)
target

In [None]:
import tensorflow as tf
from keras import layers

print(x_train.shape)

model = tf.keras.Sequential([
    layers.LSTM(64, input_shape=[x_train.shape[1], x_train.shape[2]]),
    layers.Dense(1)])

model.compile(
    # optimizer=tf.keras.optimizers.RMSprop(0.0001),
    optimizer='RMSProp', 
    loss='mse')

In [None]:
model.summary()

history = model.fit(
    x_train,
    y_train,
    validation_split=0.3,
    epochs=50
)

# Prediction

In [None]:
from sklearn.metrics import r2_score
import seaborn as sns

y_prob = model.predict(x_test)

# Function to print the y_predicted compared with the y_test
def print_test_x_prediction(y_test, y_prob, df_date, train_size, window=0):
    sns.lineplot(x=df_date.iloc[train_size + window:], y=y_test[:,0], label = 'Test')
    sns.lineplot(x=df_date.iloc[train_size + window:], y=y_prob[:,0], label = 'Predict')

print_test_x_prediction(y_test,y_prob,df['_time'], len(y_train), window=window)

print("RScore ",r2_score(y_test, y_prob))