<a href="https://colab.research.google.com/github/rezamohamadlo/Networks-Failing-Detector/blob/main/Networks_Failing_Detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Importing necessary libraries
import numpy as np  # NumPy is used for numerical operations and handling arrays
import matplotlib.pyplot as plt  # Matplotlib is used for creating visualizations and plots

# Importing utility functions from a custom module
from utils import *  # This imports all functions and classes defined in the 'utils' module

# Ensuring that plots are displayed inline
%matplotlib inline

In [2]:
def estimate_gaussian(X):
    """
    Calculates mean and variance of all features in the dataset

    Args:
        X (ndarray): (m, n) Data matrix

    Returns:
        mu (ndarray): (n,) Mean of all features
        var (ndarray): (n,) Variance of all features
    """

    m, n = X.shape

    mu = 1 / m * np.sum(X, axis = 0)
    var = 1 / m * np.sum((X - mu) ** 2, axis = 0)


    return mu, var

In [3]:

def select_threshold(y_val, p_val):
    """
    Finds the best threshold to use for selecting outliers
    based on the results from a validation set (p_val)
    and the ground truth (y_val)

    Args:
        y_val (ndarray): Ground truth on validation set
        p_val (ndarray): Results on validation set

    Returns:
        epsilon (float): Threshold chosen
        F1 (float):      F1 score by choosing epsilon as threshold
    """

    best_epsilon = 0
    best_F1 = 0
    F1 = 0

    step_size = (max(p_val) - min(p_val)) / 1000

    for epsilon in np.arange(min(p_val), max(p_val), step_size):

        predictions = (p_val < epsilon)
        tp = np.sum((predictions == 1) & (y_val == 1))
        fn = np.sum((predictions == 0) & (y_val == 1))
        fp = sum((predictions == 1) & (y_val == 0))
        prec = tp / (tp + fp)
        rec = tp / (tp + fn)
        F1 = 2 * prec * rec / (prec + rec)

        if F1 > best_F1:
            best_F1 = F1
            best_epsilon = epsilon

    return best_epsilon, best_F1

In [4]:
# Load training and validation datasets

# Load the training data features from the specified file
X_train_high = np.load("/content/X_part2.npy")
# X_train_high will contain the feature matrix for the training set

# Load the validation data features from the specified file
X_val_high = np.load("/content/X_val_part2.npy")
# X_val_high will contain the feature matrix for the validation set

# Load the validation data labels from the specified file
y_val_high = np.load("/content/y_val_part2.npy")
# y_val_high will contain the ground truth labels for the validation set

In [5]:
# Print the shapes of the loaded datasets to understand their dimensions

# Print the shape of the training feature matrix
print('The shape of X_train_high is:', X_train_high.shape)
# This will output the dimensions of X_train_high in the format (number of samples, number of features)

# Print the shape of the validation feature matrix
print('The shape of X_val_high is:', X_val_high.shape)
# This will output the dimensions of X_val_high in the format (number of samples, number of features)

# Print the shape of the validation labels
print('The shape of y_val_high is:', y_val_high.shape)
# This will output the dimensions of y_val_high, which should be (number of samples,) for a 1D array

The shape of X_train_high is: (1000, 11)
The shape of X_val_high is: (100, 11)
The shape of y_val_high is: (100,)


In [6]:
# Estimate the Gaussian parameters
mu_high, var_high = estimate_gaussian(X_train_high)

# Evaluate the probabilites for the training set
p_high = multivariate_gaussian(X_train_high, mu_high, var_high)

# Evaluate the probabilites for the cross validation set
p_val_high = multivariate_gaussian(X_val_high, mu_high, var_high)

# Find the best threshold
epsilon_high, F1_high = select_threshold(y_val_high, p_val_high)

print('Best epsilon found using cross-validation: %e'% epsilon_high)
print('Best F1 on Cross Validation Set:  %f'% F1_high)
print('# Anomalies found: %d'% sum(p_high < epsilon_high))

Best epsilon found using cross-validation: 1.377229e-18
Best F1 on Cross Validation Set:  0.615385
# Anomalies found: 117


  prec = tp / (tp + fp)
