In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

In [3]:
# Load the processed data
data = pd.read_csv('Processed.csv')

# Separate features and target labels
X = data.drop('app', axis=1)  # Replace 'label' with the actual target column name
y = data['app']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [6]:
pca = PCA().fit(X_scaled)
cumulative_variance = np.cumsum(pca.explained_variance_ratio_)
n_components = np.argmax(cumulative_variance >= 0.95) + 1  # Choose components that reach 95% variance
print("Optimal number of components:", n_components)

# Applying PCA with optimal components
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

Optimal number of components: 15


In [7]:
# Define autoencoder structure
input_dim = X_pca.shape[1]
encoding_dim = 25  # Latent space dimension, adjust as needed

# Encoder
input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation="relu")(input_layer)

# Decoder
decoder = Dense(input_dim, activation="sigmoid")(encoder)

# Autoencoder Model
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mse')

# Train autoencoder
autoencoder.fit(X_pca, X_pca, epochs=50, batch_size=32, shuffle=True, validation_split=0.2)


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 1.9408 - val_loss: 1.6190
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.8165 - val_loss: 1.5535
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.7471 - val_loss: 1.4980
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.2180 - val_loss: 1.4536
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.7676 - val_loss: 1.4185
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.5430 - val_loss: 1.3885
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.5363 - val_loss: 1.3639
Epoch 8/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.5220 - val_loss: 1.3414
Epoch 9/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x239199bb710>

In [9]:
encoder_model = Model(inputs=input_layer, outputs=encoder)
X_latent = encoder_model.predict(X_pca)
# Split the data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X_latent, y, test_size=0.3, random_state=42)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [10]:
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

In [11]:
# Predict and evaluate on test data
y_pred = classifier.predict(X_test)

# Calculate accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)


Accuracy: 0.7372881355932204
Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.54      0.50        13
           1       0.82      0.56      0.67        16
           2       0.89      0.93      0.91        44
           3       0.80      0.73      0.76        22
           4       0.88      0.91      0.89        46
           5       0.82      0.89      0.85        63
           6       0.50      0.70      0.58        10
           7       0.67      0.35      0.46        17
           8       0.43      0.46      0.44        13
           9       0.50      0.35      0.41        20
          10       0.41      0.50      0.45        22
          11       0.81      0.77      0.79        39
          12       0.64      0.78      0.70         9
          13       0.80      0.80      0.80        20

    accuracy                           0.74       354
   macro avg       0.67      0.66      0.66       354
weighted avg       0.74    

In [14]:
unique_classes = data['app'].unique()
for class_label in unique_classes:
  y_true_binary = (y_test == class_label)
  y_pred_binary = (y_pred == class_label)
  print(f"\nClassification Report for Class {class_label}:")
  print(classification_report(y_true_binary, y_pred_binary))


Classification Report for Class 0:
              precision    recall  f1-score   support

       False       0.98      0.98      0.98       341
        True       0.47      0.54      0.50        13

    accuracy                           0.96       354
   macro avg       0.72      0.76      0.74       354
weighted avg       0.96      0.96      0.96       354


Classification Report for Class 1:
              precision    recall  f1-score   support

       False       0.98      0.99      0.99       338
        True       0.82      0.56      0.67        16

    accuracy                           0.97       354
   macro avg       0.90      0.78      0.83       354
weighted avg       0.97      0.97      0.97       354


Classification Report for Class 2:
              precision    recall  f1-score   support

       False       0.99      0.98      0.99       310
        True       0.89      0.93      0.91        44

    accuracy                           0.98       354
   macro avg       0

In [15]:
# Save trained models
classifier_filename = 'traffic_classifier.pkl'
autoencoder_filename = 'traffic_autoencoder.h5'

import joblib
joblib.dump(classifier, classifier_filename)
autoencoder.save(autoencoder_filename)




In [5]:
#without scaling technique
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv('Processed.csv')

# Separate features and target
X = data.drop(columns=['app'])
y = data['app']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set 
y_pred = rf_model.predict(X_test)

# Calculate accuracy and display classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Accuracy: 0.8813559322033898
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       0.75      0.60      0.67        10
           2       1.00      0.97      0.98        32
           3       0.87      1.00      0.93        13
           4       1.00      0.93      0.97        30
           5       0.93      1.00      0.96        40
           6       0.80      0.89      0.84         9
           7       0.43      0.33      0.38         9
           8       0.56      0.62      0.59         8
           9       0.90      0.64      0.75        14
          10       0.81      0.81      0.81        16
          11       0.90      0.93      0.91        28
          12       1.00      1.00      1.00         6
          13       0.94      1.00      0.97        17

    accuracy                           0.88       236
   macro avg       0.81      0.82      0.81       236
weighted avg       0.88     

In [1]:
#with standradization = z-score normalisation
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv('Processed.csv')

# Separate features and target
X = data.drop(columns=['app'])
y = data['app']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Calculate accuracy and display classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)


Accuracy: 0.885593220338983
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       0.75      0.60      0.67        10
           2       1.00      0.97      0.98        32
           3       0.87      1.00      0.93        13
           4       1.00      0.93      0.97        30
           5       0.93      1.00      0.96        40
           6       0.80      0.89      0.84         9
           7       0.44      0.44      0.44         9
           8       0.56      0.62      0.59         8
           9       0.90      0.64      0.75        14
          10       0.81      0.81      0.81        16
          11       0.93      0.93      0.93        28
          12       1.00      1.00      1.00         6
          13       1.00      1.00      1.00        17

    accuracy                           0.89       236
   macro avg       0.82      0.83      0.82       236
weighted avg       0.89      

In [2]:
unique_classes = data['app'].unique()
for class_label in unique_classes:
  y_true_binary = (y_test == class_label)
  y_pred_binary = (y_pred == class_label)
  print(f"\nClassification Report for Class {class_label}:")
  print(classification_report(y_true_binary, y_pred_binary))


Classification Report for Class 0:
              precision    recall  f1-score   support

       False       1.00      0.99      0.99       232
        True       0.50      0.75      0.60         4

    accuracy                           0.98       236
   macro avg       0.75      0.87      0.80       236
weighted avg       0.99      0.98      0.98       236


Classification Report for Class 1:
              precision    recall  f1-score   support

       False       0.98      0.99      0.99       226
        True       0.75      0.60      0.67        10

    accuracy                           0.97       236
   macro avg       0.87      0.80      0.83       236
weighted avg       0.97      0.97      0.97       236


Classification Report for Class 2:
              precision    recall  f1-score   support

       False       1.00      1.00      1.00       204
        True       1.00      0.97      0.98        32

    accuracy                           1.00       236
   macro avg       1

In [4]:
from sklearn.preprocessing import MinMaxScaler

# Normalize the features using MinMaxScaler
normalizer = MinMaxScaler()
X_normalized = normalizer.fit_transform(X)

# Split the data into training and testing sets with normalized data
X_train_norm, X_test_norm, y_train_norm, y_test_norm = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

# Train the Random Forest model on normalized data
rf_model_norm = RandomForestClassifier(random_state=42)
rf_model_norm.fit(X_train_norm, y_train_norm)

# Make predictions on the test set with normalized data
y_pred_norm = rf_model_norm.predict(X_test_norm)

# Calculate accuracy and classification report for the normalized data
accuracy_norm = accuracy_score(y_test_norm, y_pred_norm)
report_norm = classification_report(y_test_norm, y_pred_norm)

print(f"Accuracy: {accuracy_norm}")
print("Classification Report:")
print(report_norm)


Accuracy: 0.8728813559322034
Classification Report:
              precision    recall  f1-score   support

           0       0.43      0.75      0.55         4
           1       0.75      0.60      0.67        10
           2       1.00      0.97      0.98        32
           3       0.75      0.92      0.83        13
           4       0.96      0.87      0.91        30
           5       0.93      1.00      0.96        40
           6       0.80      0.89      0.84         9
           7       0.50      0.44      0.47         9
           8       0.60      0.75      0.67         8
           9       0.89      0.57      0.70        14
          10       0.81      0.81      0.81        16
          11       0.96      0.93      0.95        28
          12       1.00      1.00      1.00         6
          13       0.94      1.00      0.97        17

    accuracy                           0.87       236
   macro avg       0.81      0.82      0.81       236
weighted avg       0.88     