<a href="https://colab.research.google.com/github/lohitkolluri/k8s-prediction-model/blob/main/k8s_prediction_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Building

In [None]:
### Step 1: Install Required Libraries (if needed)

!pip install tensorflow numpy pandas scikit-learn matplotlib imbalanced-learn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from collections import Counter
from imblearn.over_sampling import SMOTE



In [None]:
### Step 2: Generate Synthetic Kubernetes Data

# Set random seed for reproducibility
np.random.seed(42)

# Define simulation length (e.g., 1200 time steps ~ simulate 1200 minutes)
N = 1200

# Time index (representing minutes/seconds)
time = np.arange(N)

# Simulate normal Kubernetes cluster metrics
cpu_usage = 50 + 10 * np.sin(2 * np.pi * time / 100) + np.random.normal(0, 3, N)
mem_usage = 60 + 5 * np.sin(2 * np.pi * time / 150) + np.random.normal(0, 2, N)
disk_usage = 70 + np.random.normal(0, 1, N)
net_throughput = 100 + 20 * np.sin(2 * np.pi * time / 120) + np.random.normal(0, 5, N)
active_nodes = np.full(N, 5)  # Assume 5 nodes normally
active_pods = np.full(N, 50)  # Assume 50 pods normally
error_rate = np.random.poisson(0.2, N)  # Few random errors

# Clip values to realistic bounds
cpu_usage = np.clip(cpu_usage, 0, 100)
mem_usage = np.clip(mem_usage, 0, 100)
disk_usage = np.clip(disk_usage, 0, 100)
net_throughput = np.clip(net_throughput, 0, None)

# Initialize anomaly labels (0 = Normal)
labels = np.zeros(N, dtype=int)

# Inject anomalies at specific time intervals
anomalies = {
    5: [(50, 59), (650, 659)],  # Node Failure
    1: [(150, 159), (750, 759)],  # CPU Exhaustion
    2: [(250, 259), (850, 859)],  # Memory Leak
    3: [(350, 359), (950, 959)],  # Disk Saturation
    4: [(450, 459), (1050, 1059)],  # Network Issue
    6: [(550, 559), (1150, 1159)]  # Service Disruption
}

# Apply anomaly patterns to data
for label, segments in anomalies.items():
    for start, end in segments:
        labels[start:end+1] = label
        if label == 5:  # Node Failure
            cpu_usage[start:end+1] *= 0.2
            mem_usage[start:end+1] *= 0.2
            net_throughput[start:end+1] *= 0.2
            active_nodes[start:end+1] = 4
            active_pods[start:end+1] -= 5
        elif label == 1:  # CPU Exhaustion
            cpu_usage[start:end+1] = 95 + np.random.normal(0, 2, end - start + 1)
            cpu_usage[start:end+1] = np.clip(cpu_usage[start:end+1], 0, 100)
        elif label == 2:  # Memory Leak
            mem_trend = np.linspace(mem_usage[start-1], 98, end - start + 1)
            mem_usage[start:end+1] = np.clip(mem_trend + np.random.normal(0, 1, end - start + 1), 0, 100)
        elif label == 3:  # Disk Saturation
            disk_usage[start:end+1] = np.clip(95 + np.random.normal(0, 1, end - start + 1), 0, 100)
        elif label == 4:  # Network Issue
            net_throughput[start:end+1] = np.clip(5 + np.random.normal(0, 1, end - start + 1), 0, None)
        elif label == 6:  # Service Disruption
            error_rate[start:end+1] = np.random.poisson(5, end - start + 1)
            cpu_usage[start:end+1] *= 0.5

# Create DataFrame
data = pd.DataFrame({
    'cpu_usage': cpu_usage,
    'mem_usage': mem_usage,
    'disk_usage': disk_usage,
    'net_throughput': net_throughput,
    'active_nodes': active_nodes,
    'active_pods': active_pods,
    'error_rate': error_rate,
    'label': labels
})


In [None]:
### Step 3: Preprocess Data

scaler = MinMaxScaler()
feature_cols = ['cpu_usage', 'mem_usage', 'disk_usage', 'net_throughput', 'error_rate']  # Removed less useful features
X_features = scaler.fit_transform(data[feature_cols])
y_labels = data['label']

# Apply SMOTE oversampling to balance classes
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X_features, y_labels)

# Reshape for LSTM input
X_resampled = X_resampled.reshape((X_resampled.shape[0], 1, X_resampled.shape[1]))

In [None]:
### Step 4: Define and Train LSTM Model

model = Sequential([
    LSTM(256, return_sequences=True, input_shape=(1, X_resampled.shape[2])),
    LSTM(128, activation='tanh'),
    Dropout(0.3),
    Dense(7, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(X_resampled, y_resampled, epochs=50, batch_size=64, verbose=2)

Epoch 1/50


  super().__init__(**kwargs)


119/119 - 3s - 27ms/step - accuracy: 0.5619 - loss: 1.3590
Epoch 2/50
119/119 - 1s - 9ms/step - accuracy: 0.9235 - loss: 0.2139
Epoch 3/50
119/119 - 1s - 10ms/step - accuracy: 0.9495 - loss: 0.1214
Epoch 4/50
119/119 - 1s - 5ms/step - accuracy: 0.9556 - loss: 0.1023
Epoch 5/50
119/119 - 1s - 5ms/step - accuracy: 0.9578 - loss: 0.0954
Epoch 6/50
119/119 - 1s - 5ms/step - accuracy: 0.9570 - loss: 0.0948
Epoch 7/50
119/119 - 1s - 5ms/step - accuracy: 0.9566 - loss: 0.0949
Epoch 8/50
119/119 - 1s - 5ms/step - accuracy: 0.9586 - loss: 0.0909
Epoch 9/50
119/119 - 1s - 5ms/step - accuracy: 0.9569 - loss: 0.0902
Epoch 10/50
119/119 - 1s - 5ms/step - accuracy: 0.9624 - loss: 0.0857
Epoch 11/50
119/119 - 1s - 5ms/step - accuracy: 0.9589 - loss: 0.0910
Epoch 12/50
119/119 - 1s - 5ms/step - accuracy: 0.9606 - loss: 0.0860
Epoch 13/50
119/119 - 1s - 5ms/step - accuracy: 0.9636 - loss: 0.0834
Epoch 14/50
119/119 - 1s - 5ms/step - accuracy: 0.9599 - loss: 0.0855
Epoch 15/50
119/119 - 1s - 5ms/step - 

In [None]:
### Step 5: Evaluate Model

y_pred_probs = model.predict(X_resampled)
y_pred = np.argmax(y_pred_probs, axis=1)
print("Classification Report:\n", classification_report(y_resampled, y_pred, digits=4))
print("Confusion Matrix:\n", confusion_matrix(y_resampled, y_pred))

[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Classification Report:
               precision    recall  f1-score   support

           0     0.9751    0.9806    0.9778      1080
           1     1.0000    1.0000    1.0000      1080
           2     0.9804    0.9750    0.9777      1080
           3     1.0000    1.0000    1.0000      1080
           4     1.0000    1.0000    1.0000      1080
           5     1.0000    1.0000    1.0000      1080
           6     1.0000    1.0000    1.0000      1080

    accuracy                         0.9937      7560
   macro avg     0.9937    0.9937    0.9937      7560
weighted avg     0.9937    0.9937    0.9937      7560

Confusion Matrix:
 [[1059    0   21    0    0    0    0]
 [   0 1080    0    0    0    0    0]
 [  27    0 1053    0    0    0    0]
 [   0    0    0 1080    0    0    0]
 [   0    0    0    0 1080    0    0]
 [   0    0    0    0    0 1080    0]
 [   0    0    0    0    0    0 1080]]


In [None]:
model.save('/content/Model Saves/Phase_1_99.3.h5')
model.save('/content/Model Saves/Phase_1_99.3.keras')



# Model Testing

In [40]:
import pandas as pd

# Load the dataset
file_path = "/content/elastic_february2022_data.csv"  # Adjust if needed
df = pd.read_csv(file_path)

# Display dataset summary
print("Dataset Overview:")
print(df.head())  # Show first few rows
print("\nDataset Info:")
print(df.info())  # Show column names and types


Dataset Overview:
                           _source_flow_id  _source_flow_final  \
0       EAD/////AP////////8AAAGsEAILrBACDA               False   
1       EAD/////AP////////8AAAGsEAIKrBACDA               False   
2  EAT/////AP//////CP8AAAHAqFSDwKj3AAEIbQI               False   
3  EAT/////AP//////CP8AAAHAqFSPwKj3D8CeVCQ               False   
4  EAT/////AP//////CP8AAAEKapNUwKj3DvAjvpo               False   

  _source_source_ip _source_destination_ip  _source_network_bytes  \
0    240.16.203.232         240.16.203.236               72096053   
1    240.16.203.236         240.16.203.233                 913734   
2    190.215.171.30           190.215.9.27               30684984   
3    190.215.171.17           190.215.9.16                 220136   
4      190.215.9.17         53.181.234.140                  73242   

  _source_network_transport        _source_@timestamp  _source_event_duration  \
0                       NaN  2022-02-25T12:20:00.007Z             41753051589   
1       