In [12]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/5_6G_slicing_project


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/5_6G_slicing_project


In [13]:
import os
print(os.getcwd())
print(os.listdir("."))

/content/drive/MyDrive/5_6G_slicing_project
['data']


In [14]:
import os, glob
import numpy as np
import pandas as pd

WINDOW = 144   # 1 jour (10 min)
H = 6          # horizon = 1h
IP_DIR = "data/ip_sample/ip_addresses_sample/agg_10_minutes"
TIMES_PATH = "data/times/times/times_10_minutes.csv"
MAX_IPS = 200


In [15]:
times = pd.read_csv(TIMES_PATH)[["id_time","time"]].rename(columns={"time":"timestamp"})
times["timestamp"] = pd.to_datetime(times["timestamp"], errors="coerce")
times = times.dropna(subset=["timestamp"])


  times["timestamp"] = pd.to_datetime(times["timestamp"], errors="coerce")


In [16]:
import joblib

kmeans = joblib.load("kmeans_3clusters.pkl")
scaler = joblib.load("scaler.pkl")
print("Modèle et scaler chargés.")


Modèle et scaler chargés.


In [17]:
FEATURES_CLUSTER = ['n_bytes','n_packets','n_flows',
                    'tcp_udp_ratio_bytes','dir_ratio_bytes']

def prepare_one_ip_with_cluster(csv_path, times_df, kmeans, scaler):
    df = pd.read_csv(csv_path)
    df = df.merge(times_df, on="id_time", how="inner")
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
    df = df.dropna(subset=["timestamp"]).sort_values("timestamp")

    feats = df[FEATURES_CLUSTER].copy()
    feats = np.log1p(feats)
    feats_scaled = scaler.transform(feats)

    df["cluster"] = kmeans.predict(feats_scaled)
    return df


In [18]:
CLUSTER_TO_SLICE = {0:"mMTC", 1:"URLLC", 2:"eMBB"}

def build_slice_timeseries(file_list, times_df, kmeans, scaler):
    parts = []

    for p in file_list:
        df = prepare_one_ip_with_cluster(p, times_df, kmeans, scaler)

        agg = (
            df.groupby(["timestamp","cluster"])["n_bytes"]
              .sum()
              .reset_index()
              .pivot(index="timestamp", columns="cluster", values="n_bytes")
        ).fillna(0)

        for c in [0,1,2]:
            if c not in agg.columns:
                agg[c] = 0

        agg = agg[[0,1,2]]
        agg.columns = ["mMTC","URLLC","eMBB"]
        parts.append(agg)

    full = pd.concat(parts).groupby("timestamp").mean().sort_index()
    return full


In [19]:
all_files = sorted(glob.glob(os.path.join(IP_DIR, "*.csv")))
files = all_files[:MAX_IPS]

slice_df = build_slice_timeseries(files, times, kmeans, scaler)
slice_df.head()




Unnamed: 0_level_0,mMTC,URLLC,eMBB
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-10-09 02:03:49+02:00,11589.512195,25.097561,26962480.0
2023-10-09 02:13:49+02:00,5771.275,38.775,28773400.0
2023-10-09 02:23:49+02:00,6997.435897,31.076923,33288030.0
2023-10-09 02:33:49+02:00,6487.536585,35.292683,93568410.0
2023-10-09 02:43:49+02:00,8919.804878,23.02439,25334410.0


In [20]:
slice_norm = slice_df.copy()
norm_params = {}

for col in ["mMTC","URLLC","eMBB"]:
    y = np.log1p(slice_norm[col].values.astype(float))
    mu, sigma = y.mean(), y.std()
    slice_norm[col] = (y - mu) / (sigma + 1e-8)
    norm_params[col] = (mu, sigma)

slice_norm.head()


Unnamed: 0_level_0,mMTC,URLLC,eMBB
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-10-09 02:03:49+02:00,0.474151,-0.619415,0.555828
2023-10-09 02:13:49+02:00,0.249819,-0.415533,0.564259
2023-10-09 02:23:49+02:00,0.311804,-0.519604,0.583162
2023-10-09 02:33:49+02:00,0.28746,-0.459862,0.717204
2023-10-09 02:43:49+02:00,0.389904,-0.659462,0.547751


In [21]:
def windowize_multislice(df, window, horizon):
    vals = df[["mMTC","URLLC","eMBB"]].values.astype(np.float32)
    X, y = [], []
    for i in range(len(vals) - window - horizon):
        X.append(vals[i:i+window])
        y.append(vals[i+window+horizon])
    return np.array(X), np.array(y)

X, y = windowize_multislice(slice_norm, WINDOW, H)
print("X:", X.shape, "y:", y.shape)


X: (24775, 144, 3) y: (24775, 3)


In [22]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

y_pred_naive = X_test[:, -1, :]
mae_naive = float(np.mean(np.abs(y_pred_naive - y_test)))
print("MAE naive:", mae_naive)


MAE naive: 0.1942252665758133


In [23]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

device = "cuda" if torch.cuda.is_available() else "cpu"

train_loader = DataLoader(
    TensorDataset(torch.tensor(X_train), torch.tensor(y_train)),
    batch_size=64, shuffle=True
)
test_loader = DataLoader(
    TensorDataset(torch.tensor(X_test), torch.tensor(y_test)),
    batch_size=256, shuffle=False
)

class LSTM3Slices(nn.Module):
    def __init__(self, n_features=3, hidden=64):
        super().__init__()
        self.lstm = nn.LSTM(n_features, hidden, batch_first=True)
        self.fc = nn.Linear(hidden, 3)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

model = LSTM3Slices(n_features=3).to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

def eval_mae(model, loader):
    model.eval()
    preds, trues = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            yb = yb.to(device)
            p = model(xb)
            preds.append(p.cpu().numpy())
            trues.append(yb.cpu().numpy())
    preds = np.concatenate(preds)
    trues = np.concatenate(trues)
    return float(np.mean(np.abs(preds - trues)))

for epoch in range(10):
    model.train()
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)
        pred = model(xb)
        loss = loss_fn(pred, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()

    mae = eval_mae(model, test_loader)
    print(f"Epoch {epoch+1:02d} | test MAE: {mae:.4f}")


Epoch 01 | test MAE: 0.1536
Epoch 02 | test MAE: 0.1512
Epoch 03 | test MAE: 0.1499
Epoch 04 | test MAE: 0.1505
Epoch 05 | test MAE: 0.1513
Epoch 06 | test MAE: 0.1509
Epoch 07 | test MAE: 0.1475
Epoch 08 | test MAE: 0.1480
Epoch 09 | test MAE: 0.1469
Epoch 10 | test MAE: 0.1488
