<a href="https://colab.research.google.com/github/projectsby-saini/Cyber_Threat_Analysis/blob/main/cyber_mini_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***Cyber Threat Analysis and Mitigation***

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

In [None]:
data_path = ('/content/drive/MyDrive/CyberThreat/wustl_hdrl_2024.csv')
data = pd.read_csv(data_path, low_memory=False)
print(data.iloc[:, 4].unique())

In [None]:
data.shape

In [None]:
data.size

In [None]:
data.replace('   ->', np.nan, inplace=True)

In [None]:
data.dropna(inplace=True)

In [None]:
data = pd.get_dummies(data, drop_first=True)

In [None]:
if 'Label' not in data.columns:
    raise KeyError("The column 'Label' is not present in the DataFrame.")

In [None]:
X = data.drop('Label', axis=1)
y = data['Label']

In [None]:
print("First few rows of X:")
print(X.head())
print("First few values of y:")
print(y.head())

In [None]:
if not isinstance(X, (pd.DataFrame, np.ndarray)):
    X = np.array(X)

In [None]:
print("Type of X:", type(X))
print("Shape of X before expanding dimensions:", X.shape)
print("Dimensions of X before expanding dimensions:", X.ndim)

In [None]:
if X.ndim == 2:
    X = np.expand_dims(X, axis=2)
    print("Expanded X along axis 2")
elif X.ndim == 1:
    X = np.expand_dims(X, axis=1)
    print("Expanded X along axis 1")
else:
    print("Unexpected number of dimensions in X:", X.ndim)

In [None]:
print("Shape of X after expanding dimensions:", X.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
print("Distribution of labels in y_train:")
print(y_train.value_counts())
print("Distribution of labels in y_test:")
print(y_test.value_counts())

In [None]:
scaler = StandardScaler()

In [None]:
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)

In [None]:
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

In [None]:
model = Sequential()

In [None]:
model.add(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
try:
    history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2, verbose=1)
except Exception as e:
    print(f"Error during model training: {e}")

In [None]:
try:
    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob > 0.5).astype("int32")

    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy:.4f}')
    print(classification_report(y_test, y_pred))

    conf_matrix = confusion_matrix(y_test, y_pred)
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['0', '1'], yticklabels=['0', '1'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()

    # Plot training & validation accuracy and loss values
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train')
    plt.plot(history.history['val_accuracy'], label='Validation')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train')
    plt.plot(history.history['val_loss'], label='Validation')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

    # ROC Curve and AUC
    fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc='lower right')
    plt.show()

    # Predicted vs. Actual values
    plt.figure()
    plt.scatter(y_test, y_pred_prob, alpha=0.3)
    plt.plot([0, 1], [0, 1], color='red', linestyle='--')
    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    plt.title('Predicted vs. Actual Values')
    plt.show()
except Exception as e:
    print(f"Error during model evaluation: {e}")