In [1]:
import joblib
import pandas as pd
from sklearn.preprocessing import StandardScaler

clf = joblib.load('ids_model.pkl')


In [2]:
column_names = [
    "duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", 
    "land", "wrong_fragment", "urgent", "hot", "num_failed_logins", "logged_in", 
    "num_compromised", "root_shell", "su_attempted", "num_root", "num_file_creations", 
    "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login", 
    "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate", 
    "rerror_rate", "srv_rerror_rate", "same_srv_rate", "diff_srv_rate", 
    "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count", 
    "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", 
    "dst_host_srv_diff_host_rate", "dst_host_serror_rate", "dst_host_srv_serror_rate", 
    "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "label", "difficulty_level"
]


In [3]:
file_path = 'nsl-kdd/KDDTest-21.txt' 
df = pd.read_csv(file_path, header=None, names=column_names)
if 'difficulty_level' in df.columns:
    df.drop('difficulty_level', axis=1, inplace=True)
print(df.head())

   duration protocol_type  service flag  src_bytes  dst_bytes  land  \
0        13           tcp   telnet   SF        118       2425     0   
1         0           udp  private   SF         44          0     0   
2         0           tcp   telnet   S3          0         44     0   
3         0           udp  private   SF         53         55     0   
4         0           tcp  private   SH          0          0     0   

   wrong_fragment  urgent  hot  ...  dst_host_srv_count  \
0               0       0    0  ...                  10   
1               0       0    0  ...                 254   
2               0       0    0  ...                  79   
3               0       0    0  ...                 255   
4               0       0    0  ...                   1   

   dst_host_same_srv_rate  dst_host_diff_srv_rate  \
0                    0.38                    0.12   
1                    1.00                    0.01   
2                    0.31                    0.61   
3     

In [4]:
categorical_columns = ['protocol_type', 'service', 'flag']
df_existing_categorical = [col for col in categorical_columns if col in df.columns]

if df_existing_categorical:
    df = pd.get_dummies(df, columns=df_existing_categorical)
else:
    print("Warning: Categorical columns not found in the dataset!")


In [5]:
train_columns = joblib.load("train_columns.pkl")  
missing_cols = set(train_columns) - set(df.columns)
for col in missing_cols:
    df[col] = 0  
df = df[train_columns] 

In [6]:
scaler = joblib.load("scaler.pkl")  # Save scaler earlier
X_test = scaler.transform(df)

In [7]:
y_pred = clf.predict(X_test)


In [9]:
label_encoder = joblib.load("label_encoder.pkl")  # Save label encoder earlier
y_pred_labels = label_encoder.inverse_transform(y_pred)

In [10]:
df_test_results = pd.DataFrame({'Predicted Label': y_pred_labels})
print(df_test_results)

      Predicted Label
0              normal
1              normal
2              normal
3              normal
4                nmap
...               ...
11845          normal
11846          normal
11847          normal
11848          normal
11849          normal

[11850 rows x 1 columns]


In [12]:
import numpy as np
import pandas as pd
import joblib

scaler = joblib.load('scaler.pkl')
clf = joblib.load('ids_model.pkl')
label_encoder = joblib.load('label_encoder.pkl')

new_sample = np.array([[0, 'tcp', 'ftp_data', 'SF', 491, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 
                         0.00, 0.00, 0.00, 0.00, 1.00, 0.00, 0.00, 150, 25, 0.17, 0.03, 0.17, 0.00, 0.00, 0.00, 0.05, 0.00]])

column_names = ["duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", 
                "land", "wrong_fragment", "urgent", "hot", "num_failed_logins", "logged_in", 
                "num_compromised", "root_shell", "su_attempted", "num_root", "num_file_creations", 
                "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login", 
                "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate", 
                "rerror_rate", "srv_rerror_rate", "same_srv_rate", "diff_srv_rate", 
                "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count", 
                "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", 
                "dst_host_srv_diff_host_rate", "dst_host_serror_rate", "dst_host_srv_serror_rate", 
                "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "label"]


categorical_columns = ['protocol_type', 'service', 'flag']

df_sample = pd.DataFrame(new_sample, columns=column_names[:-1])  
df_sample = pd.get_dummies(df_sample, columns=categorical_columns)

train_columns = joblib.load('train_columns.pkl')

missing_cols = set(train_columns) - set(df_sample.columns)
for col in missing_cols:
    df_sample[col] = 0

df_sample = df_sample[train_columns]
X_sample = scaler.transform(df_sample)
y_pred_sample = clf.predict(X_sample)
predicted_label = label_encoder.inverse_transform(y_pred_sample)

print("Predicted Intrusion Type:", predicted_label[0])


Predicted Intrusion Type: normal
