In [None]:
!pip install numpy pandas scikit-learn matplotlib seaborn kaggle


In [None]:
from google.colab import files
files.upload()


In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression


In [None]:
train_df = pd.read_csv("Train_data.csv")
test_df = pd.read_csv("Test_data.csv")

train_df.head()


In [None]:
df = pd.concat([train_df, test_df]).reset_index(drop=True)
df.head()


In [None]:
cat_cols = df.select_dtypes(include=['object']).columns
cat_cols


In [None]:
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])


In [None]:
X = df.drop("class", axis=1)
y = df["class"]


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)


In [None]:
rf = RandomForestClassifier(n_estimators=150)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)


In [None]:
svm_clf = SVC(kernel="rbf")
svm_clf.fit(X_train, y_train)
svm_pred = svm_clf.predict(X_test)


In [None]:
lr = LogisticRegression(max_iter=500)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)


In [None]:
def evaluate_model(y_test, pred):
    print("Accuracy:", accuracy_score(y_test, pred))
    print("Precision:", precision_score(y_test, pred, average='macro'))
    print("Recall:", recall_score(y_test, pred, average='macro'))
    print("F1 Score:", f1_score(y_test, pred, average='macro'))


In [None]:
print("=== Random Forest ===")
evaluate_model(y_test, rf_pred)


In [None]:
print("=== SVM ===")
evaluate_model(y_test, svm_pred)


In [None]:
print("=== Logistic Regression ===")
evaluate_model(y_test, lr_pred)


In [None]:
cm = confusion_matrix(y_test, rf_pred)

plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix â€“ Random Forest")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
import pickle
pickle.dump(rf, open("cybersecurity_rf_model.pkl", "wb"))
