In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.preprocessing import StandardScaler

# Set the random seed for reproducibility
np.random.seed(42)

#Set Visualization style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("deep")
plt.rcParams['figure.figsize'] = [12, 7]


#### 1. Generate synthetic telecom network kpi data

In [None]:
def generate_telecom_kpi_data(days=14, samples_per_day=96):
    """
       Generate synthetic telecom network KPI data.
    
        Parameters:
        - days: Number of days to generate data for
        - samples_per_day: Number of samples per day (96 = every 15 minutes)
        
        Returns:
        - DataFrame with timestamp and KPI columns
    """
    total_samples = days * samples_per_day
    start_date = datetime(2024, 2, 1)
    timestamps = [start_date + timedelta(minutes=15*i) for i in range(total_samples) ]
    
    # Base Data patterns
    time_indices = np.arange(total_samples)
    
    # Traffic load( follows daily pattern with weekly pattern overlay)
    hour_of_day = np.array([t.hour + t.minute/60 for t in timestamps])
    day_of_week = np.array([t.weekday() for t in timestamps])
    
    # Daily traffic pattern (busier during the day, peak in the evening and less at night )
    daily_pattern = np.sin(hour_of_day * 2 * np.pi/24 - np.pi/2) * 0.5 + 0.5
    # Weekend effect
    weekend_effect = np.array([0.7 if d>=5 else 1.0 for d in day_of_week])
    
    # Traffic load with pattern and random noise
    traffic_load = (
        daily_pattern * weekend_effect * 100 + np.random.normal(0, 5, total_samples)
    )
    
    # Latency ( baseline + traffic-dependent component + random noise)
    latency_base = 20 # milliseconds
    latency = (
        latency_base + 0.1 * traffic_load + np.random.normal(0, 3, total_samples)
    )
    
    # Packet loss ( low baseline, increase with traffic load)
    packet_loss_base = 0.01 # 1%
    packet_loss = (
        packet_loss_base +
        0.0003 * traffic_load +
        np.random.exponential(0.05, total_samples)
    )
    
    # Connection success rate ( high baseline, decrease with traffic load)
    conn_success_base = 99.5 # 99.5%
    conn_success = (
        conn_success_base -
        0.01 * traffic_load -
        np.random.exponential(0.1, total_samples)
    )
    conn_success = np.clip(conn_success, 90, 100)
    
    # Signal strength ( mostly stable with some random fluctuations )
    signal_strength = -70 # dBm decible milliwatts
    
    
    
    
    
    
    
    
     
    
    
    
    
    
    
    
    
    
    