# Install dependencies, perform imports, and download the dataset

In [None]:
%%capture
%load_ext autoreload
%autoreload 2

In [None]:
# %%capture
%cd ./net_model_dataset
!for file in *.zip; do unzip "$file"; done
!rm *.zip
%cd ../

In [None]:
from data_generator import DataGenerator as DataGenerator
from matplotlib import pyplot as plt
import pandas as pd
import ipywidgets as widgets
import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'white'
from IPython.display import display

# Exploring and Visualizing the RAN dataset

## Investigating the files

In [None]:
print("RAN dataset:")
!ls -lah ./net_model_dataset/ran/
print("\n\nOVS dataset:")
!ls -lah ./net_model_dataset/ovs/
print("\n\nUPF dataset:")
!ls -lah ./net_model_dataset/upf/

## Loading and sampling the dataset

In [None]:
ran_data_gen = DataGenerator("./net_model_dataset/ran/input_dataset.pkl", "./net_model_dataset/ran/output_dataset.pkl", vnf_type='RAN')
print(f"Input dataset columns: {ran_data_gen.train_input.columns.tolist()}")
print(f"Output dataset columns: {ran_data_gen.train_output.columns.tolist()}")

# Show as a table the first 5 rows of the input and output datasets
print("\n\nInput dataset sample:")
display(ran_data_gen.train_input.head())
print("\n\nOutput dataset sample:")
display(ran_data_gen.train_output.head())

In [None]:
print("\n\nInput dataset description:")
display(ran_data_gen.train_input.describe())
print("\n\nOutput dataset description:")
display(ran_data_gen.train_output.describe())

## Plotting the output throughput

In [None]:
ran_data_gen.create_3d_plot()

## Investigating packet loss and delay


In [None]:
input_throughput = 40 # between 1 and 50 Mbps
resource_allocation = 1000 # between 250 and 2500 millicores CPU

ran_data_gen = DataGenerator("./ran/input_dataset.pkl", "./ran/output_dataset.pkl", vnf_type='RAN')
data_sample = ran_data_gen.get_nearest_neighbor(input_throughput, resource_allocation)
if data_sample is None:
    pass
else:
    suffix = ['Mbps', 'millicores', 'Mbps', 's', '%']
    for i, (key, value) in enumerate(data_sample.items()):
        print(f"{key}: {value} {suffix[i]}")
        if i == 1:
            print("\n")


In [None]:
# Define sliders for input throughput and resource allocation
input_throughput_slider = widgets.IntSlider(
    value=40, min=5, max=50, step=5, 
    description='Input Throughput (Mbps):',
    continuous_update=True,
    style={'description_width': '200px'},
    layout=widgets.Layout(width='50%', padding='10px 0px')
)

resource_allocation_slider = widgets.SelectionSlider(
    options=sorted(ran_data_gen.train_input['res'].unique().tolist()), 
    value=1000, description='Resource Allocation (millicores):',
    continuous_update=True,
    style={'description_width': '200px'},
    layout=widgets.Layout(width='50%', padding='10px 0px')
)

# Create labels for metrics
metric_labels = ["Input Throughput", "Resource Allocation", "Output Throughput", "Delay", "Packet Loss"]
metric_units = ["Mbps", "millicores", "Mbps", "ms", "%"]
output_labels = [widgets.Label(f"{label}:", layout=widgets.Layout(width='150px', padding="5px", font_weight='bold')) for label in metric_labels]
output_values = [widgets.Label("", layout=widgets.Layout(width='150px', padding="5px")) for _ in metric_labels]

# Arrange labels and values in a grid
output_grid = widgets.GridBox(
    children=[widgets.HBox([label, value], layout=widgets.Layout(border='1px solid gray', padding="5px")) 
              for label, value in zip(output_labels, output_values)],
    layout=widgets.Layout(grid_template_columns="repeat(2, 300px)", width='80%', margin='20px 0px')
)

# Plot output widget
plot_output = widgets.Output()

# Plot update function
def update_plot(data_sample):
    with plot_output:
        plot_output.clear_output(wait=True)
        if data_sample:
            metric_values = list(data_sample.values())
            min_vals, max_vals = [1, 250, 1, 0, 0], [50, 2500, 50, 100, 100]
            normalized_values = [(v - min_v) / (max_v - min_v) for v, min_v, max_v in zip(metric_values, min_vals, max_vals)]
            normalized_values = [max(0, min(1, v)) for v in normalized_values]  # Clamp to [0, 1]

            plt.figure(figsize=(8, 4))
            plt.bar(metric_labels, normalized_values, color='skyblue')
            plt.xlabel("Metrics")
            plt.ylabel("Percent (%)")
            plt.ylim(0, 1)
            plt.title("Metrics Visualization")
            plt.xticks(rotation=45)
            plt.tight_layout()
            plt.show()
        else:
            print("No data available for selected input")

# Update output and plot based on slider values
def update_output(change=None):
    input_throughput = input_throughput_slider.value
    resource_allocation = resource_allocation_slider.value
    data_sample = ran_data_gen.get_nearest_neighbor(input_throughput, resource_allocation)
    
    if data_sample:
        for i, value in enumerate(data_sample.values()):
            output_values[i].value = f"{value:.1f} {metric_units[i]}"
        update_plot(data_sample)
    else:
        for value_label in output_values:
            value_label.value = "No data"
        update_plot(None)

# Bind slider updates to the function and display all widgets
input_throughput_slider.observe(update_output, names='value')
resource_allocation_slider.observe(update_output, names='value')

display(input_throughput_slider, resource_allocation_slider, output_grid, plot_output)

# Initialize plot with default slider values
update_output()

# 

# Exercise: Explore and Visualize the OvS dataset

In [None]:
# Step 1: Initialize the Data Generatr
# Task: Load the OvS dataset using the DataGenerator class.
# Hint: The dataset is located in "./ovs/input_dataset.pkl" and "./ovs/output_dataset.pkl". 
# Specify the VNF type as 'OvS' when creating an instance.
# Example: DataGenerator(<input_path>, <output_path>, <vnf_type>)
# Uncomment and complete the line below:
# ovs_data_gen = DataGenerator( ... )

# Step 2: Inspect Dataset Columns
# Task: Print a summary of the input and output dataset columns to understand the structure.
# Hint: Use .columns.tolist() on the ovs_data_gen.train_input and ovs_data_gen.train_output.
# Uncomment and complete the lines below:
# print("Input dataset columns:", ...)
# print("Output dataset columns:", ...)

# Step 3: Preview the Data
# Task: Display the first 5 rows of both the input and output datasets in a table format.
# Hint: Use .head() on the ovs_data_gen.train_input and ovs_data_gen.train_output.
# Uncomment and complete the lines below:
# print("\n\nInput dataset sample:")
# display(...)  # Show first 5 rows of input dataset
# print("\n\nOutput dataset sample:")
# display(...)  # Show first 5 rows of output dataset

# Step 4: Statistical Summary
# Task: Show a statistical summary of the input and output datasets.
# Hint: Use .describe() on the ovs_data_gen.train_input and ovs_data_gen.train_output.
# Uncomment and complete the lines below:
# print("\n\nInput dataset description:")
# display(...)  # Display statistical summary of input dataset
# print("\n\nOutput dataset description:")
# display(...)  # Display statistical summary of output dataset

# Step 5: Visualize the Dataset in 3D
# Task: Create a 3D plot of the OvS dataset using a method from ovs_data_gen.
# Hint: Look for a method in DataGenerator class (data_generator.py) that helps you visualize the data in 3D.
# Uncomment and complete the line below:
# ovs_data_gen


## Solutions

In [None]:

# Step 1: Initialize the Data Generatr
ovs_data_gen = DataGenerator("./ovs/input_dataset.pkl", "./ovs/output_dataset.pkl", vnf_type='OvS')

# Step 2: Inspect Dataset Columns
print("Input dataset columns:", ovs_data_gen.train_input.columns.tolist())
print("Output dataset columns:", ovs_data_gen.train_output.columns.tolist())

# Step 3: Preview the Data
print("\n\nInput dataset sample:")
display(ovs_data_gen.train_input.head())
print("\n\nOutput dataset sample:")
display(ovs_data_gen.train_output.head())

# Step 4: Statistical Summary
print("\n\nInput dataset description:")
display(ovs_data_gen.train_input.describe())
print("\n\nOutput dataset description:")
display(ovs_data_gen.train_output.describe())

# Step 5: Visualize the Dataset in 3D
ovs_data_gen.create_3d_plot()

