In [22]:
import pandas as pd
import random
import numpy as np

# Function to generate data for a router
def generate_data(id):
    data = {
        "id": id,
        "Property Type": "Object",
        "Description": "Object type. Has the fixed value 'object#interface-statistics'",
        "kind": "string",
        "if-name": f"interface-{id}",
        "in-errors": random.randint(0, 100),
        "in-packet-drops": random.randint(0, 50),
        "in-current-packets": random.randint(1000, 5000),
        "in-packet-rate-bps": round(np.random.uniform(10000, 50000), 2),
        "in-packet-rate-pps": round(np.random.uniform(100, 1000), 2),
        "out-errors": random.randint(0, 50),
        "out-packet-drops": random.randint(0, 25),
        "out-current-packets": random.randint(5000, 10000),
        "out-packet-rate-bps": round(np.random.uniform(50000, 100000), 2),
        "out-packet-rate-pps": round(np.random.uniform(500, 2000), 2)         
        
    }
    return data

# Function to create a dataframe of all router data
def create_dataframe(num_routers):
    data_list = []
    for id in range(num_routers):
        data = generate_data(id)
        data_list.append(data)
    
    df = pd.DataFrame(data_list)
    return df

# Function to determine if data is anomalous (10% of data should be anomalous)
def is_anomalous(row):
    # Randomly assign True to approximately 10% of the rows
    return random.random() < 0.1


df = create_dataframe(500)

# Add an 'Anomaly' column
df['Anomaly'] = df.apply(is_anomalous, axis=1)

# Display the dataframe
df
class_counts = df['Anomaly'].value_counts()


In [23]:
class_counts

False    443
True      57
Name: Anomaly, dtype: int64

In [24]:
df

Unnamed: 0,id,Property Type,Description,kind,if-name,in-errors,in-packet-drops,in-current-packets,in-packet-rate-bps,in-packet-rate-pps,out-errors,out-packet-drops,out-current-packets,out-packet-rate-bps,out-packet-rate-pps,Anomaly
0,0,Object,Object type. Has the fixed value 'object#inter...,string,interface-0,0,12,1027,39089.41,111.82,36,17,9253,53804.52,1106.75,False
1,1,Object,Object type. Has the fixed value 'object#inter...,string,interface-1,31,8,1685,23008.83,985.77,3,23,5913,85473.88,1850.55,False
2,2,Object,Object type. Has the fixed value 'object#inter...,string,interface-2,52,45,4564,11539.60,698.60,3,12,7253,60355.69,1886.04,False
3,3,Object,Object type. Has the fixed value 'object#inter...,string,interface-3,32,38,1720,34968.38,719.27,4,25,8766,66710.89,1224.11,False
4,4,Object,Object type. Has the fixed value 'object#inter...,string,interface-4,0,48,1730,45037.35,928.79,21,5,6328,94832.88,798.42,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,Object,Object type. Has the fixed value 'object#inter...,string,interface-495,80,50,1929,16219.19,216.44,0,8,6917,94531.18,1934.74,False
496,496,Object,Object type. Has the fixed value 'object#inter...,string,interface-496,1,7,2500,40910.76,996.58,22,22,5688,91528.89,611.66,False
497,497,Object,Object type. Has the fixed value 'object#inter...,string,interface-497,18,42,3525,39440.09,559.77,43,0,5061,67791.83,920.74,False
498,498,Object,Object type. Has the fixed value 'object#inter...,string,interface-498,68,21,4031,21990.06,662.73,14,3,7132,59260.22,1334.72,False


In [25]:
columns_to_drop = ["id", "Property Type", "Description", "kind", "if-name"]
df = df.drop(columns=columns_to_drop)
df

Unnamed: 0,in-errors,in-packet-drops,in-current-packets,in-packet-rate-bps,in-packet-rate-pps,out-errors,out-packet-drops,out-current-packets,out-packet-rate-bps,out-packet-rate-pps,Anomaly
0,0,12,1027,39089.41,111.82,36,17,9253,53804.52,1106.75,False
1,31,8,1685,23008.83,985.77,3,23,5913,85473.88,1850.55,False
2,52,45,4564,11539.60,698.60,3,12,7253,60355.69,1886.04,False
3,32,38,1720,34968.38,719.27,4,25,8766,66710.89,1224.11,False
4,0,48,1730,45037.35,928.79,21,5,6328,94832.88,798.42,True
...,...,...,...,...,...,...,...,...,...,...,...
495,80,50,1929,16219.19,216.44,0,8,6917,94531.18,1934.74,False
496,1,7,2500,40910.76,996.58,22,22,5688,91528.89,611.66,False
497,18,42,3525,39440.09,559.77,43,0,5061,67791.83,920.74,False
498,68,21,4031,21990.06,662.73,14,3,7132,59260.22,1334.72,False


In [26]:
df['Anomaly'] = df['Anomaly'].astype(int)

In [27]:
df

Unnamed: 0,in-errors,in-packet-drops,in-current-packets,in-packet-rate-bps,in-packet-rate-pps,out-errors,out-packet-drops,out-current-packets,out-packet-rate-bps,out-packet-rate-pps,Anomaly
0,0,12,1027,39089.41,111.82,36,17,9253,53804.52,1106.75,0
1,31,8,1685,23008.83,985.77,3,23,5913,85473.88,1850.55,0
2,52,45,4564,11539.60,698.60,3,12,7253,60355.69,1886.04,0
3,32,38,1720,34968.38,719.27,4,25,8766,66710.89,1224.11,0
4,0,48,1730,45037.35,928.79,21,5,6328,94832.88,798.42,1
...,...,...,...,...,...,...,...,...,...,...,...
495,80,50,1929,16219.19,216.44,0,8,6917,94531.18,1934.74,0
496,1,7,2500,40910.76,996.58,22,22,5688,91528.89,611.66,0
497,18,42,3525,39440.09,559.77,43,0,5061,67791.83,920.74,0
498,68,21,4031,21990.06,662.73,14,3,7132,59260.22,1334.72,0


In [45]:
from sklearn.preprocessing import StandardScaler



In [46]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
from imblearn.over_sampling import SMOTE
import joblib

X = df.drop(columns=['Anomaly'])
y = df['Anomaly']

# standardization
scaler = StandardScaler()
X = scaler.fit_transform(X)
#doing oversampling
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)



# Split the resampled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Initialize Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_classifier.fit(X_train, y_train)

# Predict on the test data
y_pred = rf_classifier.predict(X_test)
model_filename = 'random_forest_model.pkl'
joblib.dump(rf_classifier, model_filename)

# Evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Confusion Matrix:
 [[81 14]
 [ 5 78]]

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.85      0.90        95
           1       0.85      0.94      0.89        83

    accuracy                           0.89       178
   macro avg       0.89      0.90      0.89       178
weighted avg       0.90      0.89      0.89       178



In [47]:
accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy:", accuracy)


Accuracy: 0.8932584269662921


In [48]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [49]:

# Initialize AdaBoost classifier with Decision Tree as base estimator
base_estimator = DecisionTreeClassifier(max_depth=1)  # Weak learner
ada_classifier = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=100, random_state=42)

# Train the model
ada_classifier.fit(X_train, y_train)

# Predict on the test data
y_pred = ada_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy:", accuracy)



Confusion Matrix:
 [[62 33]
 [19 64]]

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.65      0.70        95
           1       0.66      0.77      0.71        83

    accuracy                           0.71       178
   macro avg       0.71      0.71      0.71       178
weighted avg       0.72      0.71      0.71       178


Accuracy: 0.7078651685393258
