In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, confusion_matrix, classification_report
from sklearn.impute import SimpleImputer

In [2]:
train_data = pd.read_csv('/kaggle/input/network-attacks-dataset/Train_Data.csv')
test_data = pd.read_csv('/kaggle/input/network-attacks-dataset/Test_Data.csv')
sample_submission = pd.read_csv('/kaggle/input/sample-submission/Sample_Submission.csv')

In [3]:
X_train = train_data.drop('attack', axis=1)
y_train = train_data['attack'].apply(lambda x: 1 if x == 'neptune' else 0)

In [4]:
X_train.shape

(86845, 42)

In [5]:
y_train.shape

(86845,)

In [6]:
categorical_cols = X_train.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()

In [7]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

In [8]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [9]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)])


In [10]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))])

In [11]:
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [12]:
model.fit(X_train_split, y_train_split)

In [13]:
y_val_pred = model.predict(X_val_split)

In [14]:
print("Confusion Matrix:")
print(confusion_matrix(y_val_split, y_val_pred))

Confusion Matrix:
[[10762     0]
 [    0  6607]]


In [15]:
print("\nClassification Report:")
print(classification_report(y_val_split, y_val_pred))


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     10762
           1       1.00      1.00      1.00      6607

    accuracy                           1.00     17369
   macro avg       1.00      1.00      1.00     17369
weighted avg       1.00      1.00      1.00     17369



In [16]:
accuracy = accuracy_score(y_val_split, y_val_pred)
precision = precision_score(y_val_split, y_val_pred)
recall = recall_score(y_val_split, y_val_pred)
f1 = f1_score(y_val_split, y_val_pred)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f"Validation F1 Score: {f1}")

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Validation F1 Score: 1.0


In [17]:
y_train_pred = model.predict(X_train)

In [18]:
print("Confusion Matrix on Training Data:")
print(confusion_matrix(y_train, y_train_pred))

Confusion Matrix on Training Data:
[[53847     0]
 [    0 32998]]


In [19]:
print("\nClassification Report on Training Data:")
print(classification_report(y_train, y_train_pred))


Classification Report on Training Data:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     53847
           1       1.00      1.00      1.00     32998

    accuracy                           1.00     86845
   macro avg       1.00      1.00      1.00     86845
weighted avg       1.00      1.00      1.00     86845



In [20]:
train_accuracy = accuracy_score(y_train, y_train_pred)
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)
print(f'Training Accuracy: {train_accuracy}')
print(f'Training Precision: {train_precision}')
print(f'Training Recall: {train_recall}')
print(f"Training F1 Score: {train_f1}")


Training Accuracy: 1.0
Training Precision: 1.0
Training Recall: 1.0
Training F1 Score: 1.0


In [21]:
test_predictions = model.predict(test_data)

In [22]:
output_df = sample_submission.copy()
output_df['attack'] = test_predictions

In [23]:
output_df

Unnamed: 0,attack
0,1
1,0
2,1
3,1
4,1
...,...
21707,0
21708,0
21709,1
21710,0


In [24]:
output_path = '/kaggle/working/MY_SUBMISSION.csv'
output_df.to_csv(output_path, index=False)
print(f"Predictions saved to {output_path}")

Predictions saved to /kaggle/working/MY_SUBMISSION.csv
