# AI-Driven Load Balancing using LSTM on Azure LLM Inference Dataset
This notebook demonstrates a time-series forecasting approach using LSTM to simulate intelligent load balancing across servers based on Azure's LLM Inference trace dataset.

In [None]:
# Step 1: Install required packages
!pip install -q pandas numpy scikit-learn tensorflow matplotlib

In [1]:
# Step 2: Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

In [2]:
# Step 3: Load data (Replace this path with your dataset URL or file path)
import pandas as pd
from datetime import datetime

# Load one of the datasets (you can repeat for both)
df_code = pd.read_csv("/Users/sathwik/VISUAL STUDIO CODE/Cloud:Fog/Azure LLM Inference Dataset/data2024/AzureLLMInferenceTrace_code_1week.csv", parse_dates=["TIMESTAMP"])
df_code["TotalTokens"] = df_code["ContextTokens"] + df_code["GeneratedTokens"]

# Optional: Resample to minute-wise or second-wise request load
df_code.set_index("TIMESTAMP", inplace=True)
token_load = df_code["TotalTokens"].resample("1min").sum().reset_index()
token_load.columns = ["Timestamp", "TotalTokens"]
time_index = pd.date_range(start='2024-05-10', periods=1000, freq='T')
total_tokens = np.sin(np.linspace(0, 50, 1000)) * 1000 + 5000 + np.random.normal(0, 100, 1000)
token_load = pd.DataFrame({'Timestamp': time_index, 'TotalTokens': total_tokens})
token_load.set_index('Timestamp', inplace=True)

  time_index = pd.date_range(start='2024-05-10', periods=1000, freq='T')


In [3]:
# Step 4: Prepare data for LSTM
look_back = 10
values = token_load['TotalTokens'].values.reshape(-1, 1)
scaler = MinMaxScaler()
scaled_values = scaler.fit_transform(values)
X, y = [], []
for i in range(len(scaled_values) - look_back):
    X.append(scaled_values[i:i+look_back])
    y.append(scaled_values[i+look_back])
X, y = np.array(X), np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [4]:
# Step 5: Train LSTM
model = Sequential([
    LSTM(64, input_shape=(look_back, 1)),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/5


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.3095 - val_loss: 0.0184
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0200 - val_loss: 0.0196
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0132 - val_loss: 0.0108
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0116 - val_loss: 0.0113
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0093 - val_loss: 0.0083


<keras.src.callbacks.history.History at 0x1563be500>

In [5]:
# Step 6: Predict and simulate load balancing
y_pred_scaled = model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred_scaled)
server_loads = [0, 0, 0]
for load in y_pred.flatten():
    idx = server_loads.index(min(server_loads))
    server_loads[idx] += load
for i, load in enumerate(server_loads):
    print(f'Server {i} Total Load: {load:.2f}')

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Server 0 Total Load: 322403.00
Server 1 Total Load: 322252.75
Server 2 Total Load: 322550.56
