In [16]:
import tensorflow as tf
print(tf.__version__)

2.17.1


In [17]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from csv import writer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, Conv1D,MaxPooling1D,BatchNormalization
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import ticker as ticker
import matplotlib.pyplot as plt
import h5py
import os
from sklearn.metrics import f1_score

# Input : Preprocessed Website Fingerprint(WF) dataset

In [18]:
train_X = pd.read_csv('trainX_US_Chrome.csv', header=None)
train_Y = pd.read_csv('trainY_US_Chome.csv', header=None)

In [19]:
trainX = train_X.to_numpy()
trainY = train_Y.to_numpy()

In [20]:
print(trainX.shape)
print(trainY.shape)

(10000, 6000)
(10000, 1)


In [21]:
trainX = np.expand_dims(trainX,axis=2)

In [22]:
minimum = np.amin(trainX)
maximum = np.amax(trainX)

trainX_normalized = (trainX-minimum)/(maximum-minimum)
print(trainX_normalized.shape)

trainX_test = trainX_normalized

(10000, 6000, 1)


In [23]:
trainY= keras.utils.to_categorical(trainY)
print(trainY.shape)

(10000, 100)


In [24]:
# set aside 20% of train and test data for evaluation
X_train, X_test, y_train, y_test = train_test_split(trainX_test, trainY ,test_size=0.2, shuffle = True, random_state = 42)

# Use the same function above for the validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state= 7) # 0.25 x 0.8 = 0.2

print("--------------Data--------------")
print("Orignal X shape: {}".format(trainX.shape))
print("Origianl Y shape: {}".format(trainY.shape))

print("--------------Train Data--------------")
print("X_train shape: {}".format(X_train.shape))
print("y_train shape: {}".format(y_train.shape))

print("--------------Validation Data--------------")
print("X_val shape: {}".format(X_val.shape))
print("y val shape: {}".format(y_val.shape))

print("--------------Test Data--------------")
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))

--------------Data--------------
Orignal X shape: (10000, 6000, 1)
Origianl Y shape: (10000, 100)
--------------Train Data--------------
X_train shape: (6000, 6000, 1)
y_train shape: (6000, 100)
--------------Validation Data--------------
X_val shape: (2000, 6000, 1)
y val shape: (2000, 100)
--------------Test Data--------------
X_test shape: (2000, 6000, 1)
y_test shape: (2000, 100)


In [25]:
def model_create(x=None):
    # This depends on the number of Website
    numberOfWebsite = 100

    # This depends on the number of samples
    input = keras.Input(shape = (6000,1))

    x = layers.Conv1D(128, 3, activation="relu", padding="same")(input)
    x = layers.Conv1D(64, 3, activation="relu", padding="same")(x)
    x = MaxPooling1D(pool_size=3)(x)

    x = layers.Conv1D(32, 3, activation="relu", padding="same")(x)
    x = MaxPooling1D(pool_size=3)(x)

    x = layers.Dropout(0.3)(x)
    x = layers.Flatten()(x)

    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)

    x = Dense(numberOfWebsite,activation='softmax')(x)
    model = keras.Model(inputs = input, outputs = x)

    return model

In [26]:
model = model_create()
model.summary()

In [27]:
model.compile(loss="categorical_crossentropy",optimizer='adam',metrics=["accuracy"]) #Original

In [28]:
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
history = model.fit(X_train, y_train, epochs=40, validation_data=(X_val, y_val))

(6000, 6000, 1) (6000, 100)
(2000, 6000, 1) (2000, 100)
Epoch 1/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 90ms/step - accuracy: 0.0120 - loss: 4.5978 - val_accuracy: 0.0175 - val_loss: 4.4930
Epoch 2/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 50ms/step - accuracy: 0.0234 - loss: 4.4292 - val_accuracy: 0.0305 - val_loss: 4.1539
Epoch 3/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 51ms/step - accuracy: 0.0472 - loss: 4.0347 - val_accuracy: 0.0510 - val_loss: 4.0118
Epoch 4/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 51ms/step - accuracy: 0.0971 - loss: 3.7719 - val_accuracy: 0.0615 - val_loss: 3.9514
Epoch 5/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 49ms/step - accuracy: 0.1524 - loss: 3.5131 - val_accuracy: 0.0700 - val_loss: 3.9912
Epoch 6/40
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 52ms/step - accuracy: 0.1815 - loss: 3.3063 - v

# Pre-trained Model Save

In [29]:
model.save('CNN_Model_WF_retrained.h5')



In [30]:
score = model.evaluate(X_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 7.295942306518555
Test accuracy: 0.06300000101327896
