# RAN Slice PRB Prediction with Prophet

This notebook deploys Facebook's Prophet time series forecasting library for PRB Prediction

## Overview

- Fetches NSSAI performance data from InfluxDB
- Preprocesses data for Prophet format
- Creates separate Prophet models for each slice type and NSSI combination
- Trains and evaluates Prophet models
- Saves model artifacts for deployment


## 1. Imports and Configuration

In [None]:
# Standard library imports
import os
import sys
import argparse
import json
import pickle
from datetime import datetime, timezone
from typing import Tuple, List, Dict, Optional
import warnings
warnings.filterwarnings('ignore')

# Data manipulation and analysis
import numpy as np
import pandas as pd

# InfluxDB client
from influxdb_client import InfluxDBClient
from influxdb_client.client.flux_table import FluxStructureEncoder
from influxdb_client.client.write_api import SYNCHRONOUS

# Machine learning utilities
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Prophet imports
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from prophet.plot import plot_cross_validation_metric, plot_components_plotly, plot_plotly

print("All imports completed successfully!")

## 2. Configuration Parameters

In [None]:
# InfluxDB Configuration
influx_url = "http://localhost:8086"
influx_org = "srib"
influx_token = "WNwnbxZog226V4gIE6Mw37UevU7jv8O4jebwCFfSVQnsT5ER_q4RpYgf4nMM6dN2c4K6pgdbrgYOoRIOt82x7A=="
bucket = "nssi_pm_bucket"
measurement = "nssi_pm_bucket"
start = "-0"  # Time range (e.g., "-30d" for last 30 days)

# Field and Tag Definitions
field_prb_dl = "RRU.PrbDl.SNSSAI"
field_data_dl = "DRB.PdcpSduVolumeDL.SNSSAI"
field_rrc_succ = "RRC.ConnEstabSucc.Cause"
tag_slice_type = "sliceType"
tag_nssi_id = "measObjLdn"

# Model Parameters - FIXED FOR NEXT SLOT PREDICTION
forecast_periods = 10  # Number of periods to forecast ahead (next 2 slots)
freq = '15min'  # 15-minute frequency to match the data generation interval

# Model directory
model_dir = "prophet_models"

print("Configuration parameters set!")
print(f"Forecast periods: {forecast_periods}")
print(f"Frequency: {freq}")
print(f"Model directory: {model_dir}")

## 3. Data Fetching Functions

In [None]:
def build_flux_query() -> str:
    """
    Build a Flux query that:
    - filters measurement
    - keeps relevant fields
    - pivots to a wide table: columns for prb_dl, data_dl, rrc_succ
    - keeps slice_type and nssi_id as columns
    """

    field_names=[field_prb_dl, field_data_dl, field_rrc_succ]
    fields_filter = " or ".join([f'r["_field"] == "{f}"' for f in field_names])
    q = f'''
from(bucket: "{bucket}")
  |> range(start: {start})
  |> filter(fn: (r) => r["_measurement"] == "{measurement}")
  |> filter(fn: (r) => {fields_filter})
  |> pivot(rowKey: ["_time"], columnKey: ["_field"], valueColumn: "_value")
  |> keep(columns: ["_time", "{tag_slice_type}", "{tag_nssi_id}", "{'","'.join(field_names)}"])
  |> sort(columns: ["_time"])
'''

    return q

def fetch_from_influx() -> pd.DataFrame:
    """Fetch data from InfluxDB and return as pandas DataFrame"""
    client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org, timeout=60_000)
    query_api = client.query_api()
    flux = build_flux_query()
    tables = query_api.query_data_frame(query=flux)
    client.close()

    if isinstance(tables, list) and len(tables) > 0:
        df = pd.concat(tables, ignore_index=True)
    else:
        df = tables

    if df is None or df.empty:
        raise RuntimeError("No data returned from InfluxDB. Check your query parameters.")

    # Standardize column names
    df = df.rename(columns={
        "_time": "time",
        tag_slice_type: "slice_type",
        tag_nssi_id: "nssi_id",
        field_prb_dl: "prb_dl",
        field_data_dl: "data_dl",
        field_rrc_succ: "rrc_succ"
    })

    # Ensure types
    df["time"] = pd.to_datetime(df["time"], utc=True)
    df = df.sort_values(["slice_type", "nssi_id", "time"]).reset_index(drop=True)

    # Drop rows with any NA in core columns
    df = df.dropna(subset=["slice_type", "nssi_id", "time", "prb_dl", "data_dl", "rrc_succ"])

    return df[["time", "slice_type", "nssi_id", "prb_dl", "data_dl", "rrc_succ"]]

print("Data fetching functions defined!")

## 4. Data Preparation Functions for Prophet

In [None]:
def prepare_prophet_data(df: pd.DataFrame) -> Dict[str, Dict]:
    """
    Prepare data for Prophet by creating separate DataFrames for each slice_type and nssi_id combination
    """
    prophet_data = {}

    for (slice_type, nssi_id), group in df.groupby(["slice_type", "nssi_id"]):
        # Sort by time
        group = group.sort_values("time").reset_index(drop=True)

        # Prepare Prophet format: ds (datetime) and y (target)
        prophet_df = group[["time", "prb_dl"]].copy()
        prophet_df.columns = ["ds", "y"]

        # FIX: Remove timezone information from ds column for Prophet compatibility
        # Prophet doesn't support timezone-aware datetimes in the 'ds' column
        prophet_df["ds"] = prophet_df["ds"].dt.tz_localize(None)
        
        # Add additional regressors
        prophet_df["data_dl"] = group["data_dl"].values
        prophet_df["rrc_succ"] = group["rrc_succ"].values

        # Store with unique key
        key = f"{slice_type}_{nssi_id}"
        prophet_data[key] = {
            "data": prophet_df,
            "slice_type": slice_type,
            "nssi_id": nssi_id
        }

    return prophet_data

print("Data preparation functions defined!")

## 5. Prophet Model Building Functions

In [None]:
def build_prophet_model(data: pd.DataFrame, include_regressors: bool = True) -> Prophet:
    """
    Build a Prophet model with appropriate configurations
    """
    model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=True,
        changepoint_prior_scale=0.05,
        seasonality_prior_scale=10.0,
        holidays_prior_scale=10.0,
        mcmc_samples=0,
        interval_width=0.8,
        uncertainty_samples=1000
    )

    # Add additional regressors if available
    if include_regressors and "data_dl" in data.columns and "rrc_succ" in data.columns:
        model.add_regressor("data_dl", standardize=False)
        model.add_regressor("rrc_succ", standardize=False)

    return model

def train_prophet_models(prophet_data: Dict[str, Dict], model_dir: str = model_dir) -> Dict[str, Prophet]:
    """
    Train Prophet models for each slice_type and nssi_id combination
    """
    os.makedirs(model_dir, exist_ok=True)
    models = {}
    training_metrics = {}

    for key, data_dict in prophet_data.items():
        print(f"Training Prophet model for {key}...")

        df = data_dict["data"]

        # Split data into train and validation (80-20 split)
        split_idx = int(len(df) * 0.8)
        train_df = df.iloc[:split_idx]
        val_df = df.iloc[split_idx:]

        # Build and train model
        model = build_prophet_model(train_df)

        # Fit the model
        model.fit(train_df)

        # Make predictions on validation set
        if len(val_df) > 0:
            future_df = model.make_future_dataframe(periods=len(val_df), freq=freq, include_history=False)

            # Add regressor values to future dataframe
            if "data_dl" in train_df.columns:
                future_df["data_dl"] = val_df["data_dl"].values
            if "rrc_succ" in train_df.columns:
                future_df["rrc_succ"] = val_df["rrc_succ"].values

            forecast = model.predict(future_df)

            # Calculate metrics
            y_true = val_df["y"].values
            y_pred = forecast["yhat"].values[:len(y_true)]

            mae = mean_absolute_error(y_true, y_pred)
            rmse = root_mean_squared_error(y_true, y_pred)
            r2 = r2_score(y_true, y_pred)

            training_metrics[key] = {
                "mae": mae,
                "rmse": rmse,
                "r2": r2,
                "train_samples": len(train_df),
                "val_samples": len(val_df)
            }

            print(f"  MAE: {mae:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}")

        # Save model
        model_path = os.path.join(model_dir, f"prophet_model_{key}.pkl")
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)

        models[key] = model

    # Save training metrics
    metrics_path = os.path.join(model_dir, "training_metrics.json")
    with open(metrics_path, 'w') as f:
        json.dump(training_metrics, f, indent=2)

    return models

print("Prophet model building functions defined!")

## 6. Prediction and Evaluation Functions - FIXED FOR NEXT SLOT PREDICTION

In [None]:
def make_forecast(models: Dict[str, Prophet], prophet_data: Dict[str, Dict], 
                  periods: int = forecast_periods, model_dir: str = model_dir) -> Dict[str, pd.DataFrame]:
    """
    Make forecasts using trained Prophet models for the next time slots
    
    KEY FIX: This function now correctly predicts for the next time slots
    starting from the last data point + 15 minutes
    """
    forecasts = {}

    for key, model in models.items():
        print(f"Making forecast for {key}...")

        # Get the latest data for this key
        data_dict = prophet_data[key]
        historical_data = data_dict["data"]

        # Get the last timestamp from historical data
        last_timestamp = historical_data["ds"].max()
        
        # Create future dataframe starting from the last timestamp + 15min
        future = model.make_future_dataframe(periods=periods, freq=freq, include_history=False)
        
        # Adjust the future dataframe to start from the next time slot
        future_times = pd.date_range(
            start=last_timestamp + pd.Timedelta(minutes=15),
            periods=periods,
            freq=freq
        )
        future["ds"] = future_times

        # Add regressor values (for simplicity, we'll use the last known values)
        if "data_dl" in historical_data.columns:
            last_data_dl = historical_data["data_dl"].iloc[-1]
            future["data_dl"] = last_data_dl

        if "rrc_succ" in historical_data.columns:
            last_rrc_succ = historical_data["rrc_succ"].iloc[-1]
            future["rrc_succ"] = last_rrc_succ

        # Make forecast
        forecast = model.predict(future)

        # Save forecast
        forecast_path = os.path.join(model_dir, f"forecast_{key}.csv")
        forecast.to_csv(forecast_path, index=False)

        forecasts[key] = forecast

        # Print the forecast times for verification
        print(f"  Forecast times for {key}:")
        for idx, row in forecast.iterrows():
            print(f"    {row['ds']}: {row['yhat']:.2f}")

    return forecasts

print("Prediction and evaluation functions defined!")

## 7. Visualization Functions

In [None]:
def plot_forecasts(forecasts: Dict[str, pd.DataFrame], prophet_data: Dict[str, Dict], 
                   save_dir: str = model_dir):
    """
    Plot forecasts for each model
    """
    os.makedirs(save_dir, exist_ok=True)

    for key, forecast in forecasts.items():
        plt.figure(figsize=(15, 10))

        # Plot actual vs predicted
        data_dict = prophet_data[key]
        historical_data = data_dict["data"]

        # Plot historical data
        plt.plot(historical_data["ds"], historical_data["y"], 
                label="Historical PRB Usage", color="blue", alpha=0.7)

        # Plot forecast
        plt.plot(forecast["ds"], forecast["yhat"], 
                label="Forecasted PRB Usage", color="red", alpha=0.7)

        # Plot uncertainty intervals
        plt.fill_between(forecast["ds"], 
                        forecast["yhat_lower"], 
                        forecast["yhat_upper"], 
                        color="red", alpha=0.2, label="Uncertainty Interval")

        plt.title(f"Prophet Forecast for {key}")
        plt.xlabel("Time")
        plt.ylabel("PRB Usage")
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.xticks(rotation=45)
        plt.tight_layout()

        # Save plot
        plot_path = os.path.join(save_dir, f"prophet_forecast_{key}.png")
        plt.savefig(plot_path, dpi=300, bbox_inches='tight')
        plt.close()

        print(f"Forecast plot saved for {key}")

print("Visualization functions defined!")

## 8. Main Execution Pipeline

### Step 1: Fetch Data from InfluxDB

In [None]:
print("=== Step 1: Fetching Data ===")

# Check if InfluxDB configuration is set
if not influx_url or not influx_token or not influx_org:
    print("‚ö†Ô∏è  Warning: InfluxDB configuration is not set!")
    print("Please set the following variables in the Configuration cell:")
    print("- influx_url")
    print("- influx_token")
    print("- influx_org")
    print("\nFor demonstration purposes, you can load sample data instead.")
else:
    try:
        df = fetch_from_influx()
        print(f"‚úÖ Data shape: {df.shape}")
        print(f"‚úÖ Date range: {df['time'].min()} to {df['time'].max()}")
        print(f"‚úÖ Unique slice types: {df['slice_type'].nunique()}")
        print(f"‚úÖ Unique NSSI IDs: {df['nssi_id'].nunique()}")

        # Display sample data
        print("\nSample data:")
        display(df.head())

        # Display data statistics
        print("\nData statistics:")
        display(df.describe())

    except Exception as e:
        print(f"‚ùå Error fetching data: {str(e)}")
        print("Please check your InfluxDB configuration and connection.")

### Step 2: Prepare Data for Prophet

In [None]:
print("=== Step 2: Preparing Prophet Data ===")

# Check if df exists from previous step
if 'df' in locals():
    prophet_data = prepare_prophet_data(df)
    print(f"‚úÖ Created {len(prophet_data)} Prophet datasets")

    # Display information about prepared datasets
    print("\nProphet datasets summary:")
    for key, data_dict in prophet_data.items():
        data_shape = data_dict['data'].shape
        print(f"  {key}: {data_shape[0]} samples")

    # Display sample of one dataset
    sample_key = list(prophet_data.keys())[0]
    print(f"\nSample data for {sample_key}:")
    display(prophet_data[sample_key]['data'].head())
else:
    print("‚ùå No data available. Please complete Step 1 first.")

### Step 3: Train Prophet Models

In [None]:
print("=== Step 3: Training Prophet Models ===")

# Check if prophet_data exists from previous step
if 'prophet_data' in locals():
    models = train_prophet_models(prophet_data)
    print(f"‚úÖ Trained {len(models)} Prophet models")

    # Load and display training metrics
    metrics_path = os.path.join(model_dir, "training_metrics.json")
    if os.path.exists(metrics_path):
        with open(metrics_path, 'r') as f:
            training_metrics = json.load(f)

        print("\nTraining Metrics:")
        metrics_df = pd.DataFrame(training_metrics).T
        display(metrics_df)

        # Plot training metrics
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        metrics_df['mae'].plot(kind='bar', ax=axes[0,0], title='Mean Absolute Error')
        metrics_df['rmse'].plot(kind='bar', ax=axes[0,1], title='Root Mean Square Error')
        metrics_df['r2'].plot(kind='bar', ax=axes[1,0], title='R¬≤ Score')
        metrics_df[['train_samples', 'val_samples']].plot(kind='bar', ax=axes[1,1], title='Sample Sizes')

        plt.tight_layout()
        plt.show()
else:
    print("‚ùå No Prophet data available. Please complete Step 2 first.")

### Step 4: Make Forecasts - NOW PREDICTS NEXT SLOTS CORRECTLY

In [None]:
print("=== Step 4: Making Forecasts ===")

# Check if models exist from previous step
if 'models' in locals():
    forecasts = make_forecast(models, prophet_data)
    print(f"‚úÖ Generated forecasts for {len(forecasts)} models")

    # Display forecast summary
    print("\nForecast Summary:")
    for key, forecast in forecasts.items():
        forecast_shape = forecast.shape
        last_date = forecast['ds'].max()
        print(f"  {key}: {forecast_shape[0]} total points, forecast until {last_date}")

    # Display sample forecast for one model
    sample_key = list(forecasts.keys())[0]
    print(f"\nSample forecast for {sample_key}:")
    sample_forecast = forecasts[sample_key].tail(10)  # Show last 10 forecast points
    display(sample_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])
else:
    print("‚ùå No trained models available. Please complete Step 3 first.")

### Step 5: Create Visualizations

In [None]:
print("=== Step 5: Creating Visualizations ===")

# Check if forecasts exist from previous step
if 'forecasts' in locals():
    plot_forecasts(forecasts, prophet_data)
    print("‚úÖ All forecast plots created and saved!")

    # Display one of the generated plots inline
    sample_key = list(forecasts.keys())[0]
    plot_path = os.path.join(model_dir, f"prophet_forecast_{sample_key}.png")

    if os.path.exists(plot_path):
        print(f"\nDisplaying forecast plot for {sample_key}:")
        from IPython.display import Image
        display(Image(filename=plot_path))
else:
    print("‚ùå No forecasts available. Please complete Step 4 first.")

### Step 6: Save Metadata

In [None]:
print("=== Step 6: Saving Metadata ===")

# Check if we have the required data
if 'df' in locals() and 'models' in locals():
    metadata = {
        "model_type": "Prophet",
        "forecast_periods": forecast_periods,
        "frequency": freq,
        "total_models": len(models),
        "data_range": {
            "start": df["time"].min().isoformat(),
            "end": df["time"].max().isoformat()
        },
        "slice_types": df["slice_type"].unique().tolist(),
        "nssi_ids": df["nssi_id"].unique().tolist(),
        "features": ["prb_dl", "data_dl", "rrc_succ"],
        "target": "prb_dl",
        "created_at": datetime.now().isoformat(),
        "model_directory": model_dir,
        "fix_applied": "Updated to predict next 15-minute slots instead of hourly predictions"
    }

    with open(os.path.join(model_dir, "metadata.json"), "w") as f:
        json.dump(metadata, f, indent=2)

    print("‚úÖ Metadata saved successfully!")
    print("\nPipeline Metadata:")
    for key, value in metadata.items():
        print(f"  {key}: {value}")

    print(f"\nüìÅ All artifacts saved to '{model_dir}' directory:")
    if os.path.exists(model_dir):
        files = os.listdir(model_dir)
        for file in sorted(files):
            print(f"  - {file}")
else:
    print("‚ùå Required data not available. Please complete previous steps first.")