In [None]:
# Step 1: Install Necessary Libraries
# Install the required libraries
!pip install causalnex scikit-learn

# Step 2: Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from causalnex.structure.notears import from_pandas
from causalnex.inference import InferenceEngine
import networkx as nx

# Set seed for reproducibility
np.random.seed(42)

# Step 3: Generate Sample Data
# Define the number of samples
n_samples = 1000

# Define network functions
network_functions = ['GNB', 'AMF', 'SMF', 'UPF', 'AUSF', 'UDM', 'UDR', 'HSS', 'PGW', 'SGW']

# Initialize an empty dataframe
data = pd.DataFrame()

# Generate sample data for each network function
for nf in network_functions:
    data[f'{nf}_cpu_usage'] = np.random.normal(loc=50, scale=10, size=n_samples)
    data[f'{nf}_memory_usage'] = np.random.normal(loc=30, scale=5, size=n_samples)
    data[f'{nf}_packet_loss'] = np.random.normal(loc=0.5, scale=0.1, size=n_samples)
    data[f'{nf}_latency'] = np.random.normal(loc=10, scale=2, size=n_samples)

# Introduce anomalies
n_anomalies = 50
anomaly_indices = np.random.choice(data.index, n_anomalies, replace=False)

# Randomly introduce anomalies in CPU usage for demonstration
for nf in network_functions:
    data.loc[anomaly_indices, f'{nf}_cpu_usage'] = np.random.normal(loc=80, scale=5, size=n_anomalies)

print("Sample Data with Anomalies:")
print(data.head())

# Step 4: Unsupervised Anomaly Detection using Isolation Forest
# Define the Isolation Forest model
iso_forest = IsolationForest(contamination=0.05, random_state=42)

# Fit the model
iso_forest.fit(data)

# Predict anomalies
data['anomaly'] = iso_forest.predict(data)

# Mark anomalies with -1 and normal points with 1
data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0)

print("\nAnomaly Detection Results (Unsupervised - Isolation Forest):")
print(data['anomaly'].value_counts())

# Step 5: Supervised Anomaly Detection using Random Forest
# Assuming we had labels, we split the data
X = data.drop(columns=['anomaly'])
y = data['anomaly']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Random Forest classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Predict on test data
y_pred = rf_model.predict(X_test)

# Classification report
print("\nSupervised Anomaly Detection Results (Random Forest):")
print(classification_report(y_test, y_pred))

# Step 6: Root Cause Analysis using Causal DAGs
# Constructing a Causal DAG from the data
# Learn the DAG from the data using NOTEARS algorithm
sm = from_pandas(data)

# Visualize the DAG
plt.figure(figsize=(12, 8))
nx.draw(sm, with_labels=True, node_size=2000, node_color='skyblue', font_size=12, font_weight='bold')
plt.title('Causal DAG Structure')
plt.show()

# Step 7: Performing Root Cause Analysis
# Initialize the inference engine
ie = InferenceEngine(sm)

# Set evidence - assume an anomaly is detected in GNB CPU usage
ie.observe({f'GNB_cpu_usage': 80})

# Query the probabilities of the other nodes
predictions = ie.query()
print("\nRoot Cause Analysis Predictions:")
print(predictions)
