In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

In [None]:
file_path = "kubernetes_merged_dataset.csv"
df = pd.read_csv(file_path)

In [None]:
def categorize_issue(row):
    msg = str(row['event_message']).strip().lower()
    status = str(row['pod_status']).strip().lower()

    if status in ['failed', 'unknown']:
        return 1  # Node/Pod Failure
    elif 'oomkilled' in msg or row['cpu_usage'] > row['cpu_limit'] or row['memory_usage'] > row['memory_limit']:
        return 2  # Resource Exhaustion
    elif 'timeout' in msg or row['network_latency'] > 200:
        return 3  # Network/Connectivity Issue
    elif 'unavailable' in msg or '503' in msg:
        return 4  # Service Disruption
    else:
        return 0  # No Issue

In [None]:
df['issue_class'] = df.apply(categorize_issue, axis=1)

In [None]:
numerical_features = [
    'cpu_allocation_efficiency', 'memory_allocation_efficiency', 'disk_io', 'network_latency',
    'node_temperature', 'node_cpu_usage', 'node_memory_usage', 'pod_lifetime_seconds',
    'cpu_request', 'cpu_limit', 'memory_request', 'memory_limit', 'cpu_usage', 'memory_usage',
    'restart_count', 'uptime_seconds', 'network_bandwidth_usage'
]

categorical_features = ['event_type', 'deployment_strategy', 'scaling_policy', 'namespace']

In [None]:
le_dict = {}
for col in categorical_features:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    le_dict[col] = le

In [None]:
features = numerical_features + categorical_features
X = df[features]
y = df['issue_class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.49      0.17      0.25       106
           1       0.40      0.35      0.38       297
           2       0.55      0.72      0.62       339

    accuracy                           0.49       742
   macro avg       0.48      0.41      0.42       742
weighted avg       0.48      0.49      0.47       742



In [None]:
# Convert timestamp and get recent logs
df['timestamp'] = pd.to_datetime(df['timestamp'])
recent_logs = df.sort_values('timestamp', ascending=False).head(5).copy()

# Prepare model inputs
X_recent = recent_logs[features]

# Predict issue class and probabilities
recent_logs['predicted_issue_class'] = model.predict(X_recent)
proba_recent = model.predict_proba(X_recent)
recent_logs['prediction_confidence (%)'] = proba_recent.max(axis=1) * 100

# Map numeric class to readable issue type
issue_label_map = {
    0: 'Normal (No Issue)',
    1: 'Node/Pod Failure',
    2: 'Resource Exhaustion',
    3: 'Network/Connectivity Issue',
    4: 'Service Disruption'
}
recent_logs['predicted_issue_type'] = recent_logs['predicted_issue_class'].map(issue_label_map)

# Show final output
print("📊 Recent Kubernetes Issue Predictions:\n")
print(recent_logs[['timestamp', 'event_message', 'pod_status', 'cpu_usage', 'memory_usage',
                   'predicted_issue_type', 'prediction_confidence (%)']])

📊 Recent Kubernetes Issue Predictions:

               timestamp event_message pod_status  cpu_usage  memory_usage  \
3708 2023-01-01 04:09:00        Failed    Pending   0.520359   6014.724989   
3703 2023-01-01 04:09:00     Completed    Running   3.288467   8081.093258   
3699 2023-01-01 04:09:00        Killed     Failed   3.013779   4199.952670   
3700 2023-01-01 04:09:00     Completed     Failed   1.203030   4005.733449   
3702 2023-01-01 04:09:00        Killed     Failed   3.870784   5498.437695   

     predicted_issue_type  prediction_confidence (%)  
3708  Resource Exhaustion                       82.0  
3703  Resource Exhaustion                       52.0  
3699     Node/Pod Failure                       73.0  
3700     Node/Pod Failure                       45.0  
3702     Node/Pod Failure                       83.0  
