**real-world time-series data anomaly detection**

In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [79]:
df = pd.read_csv("cloud_dataset.csv")
df.head()

Unnamed: 0,Timestamp,CPU_Usage,Memory_Usage,Disk_IO,Network_IO,Workload_Type,User_ID,Anomaly_Label
0,2025-07-01 00:00:00,18.88,43.19,11.4,6.01,Database_Query,user_1,0
1,2025-07-01 00:01:00,25.31,45.43,7.68,17.67,Video_Streaming,user_1,0
2,2025-07-01 00:02:00,3.87,49.5,14.08,3.48,Database_Query,user_1,0
3,2025-07-01 00:03:00,20.92,25.88,17.33,4.77,Web_Service,user_1,0
4,2025-07-01 00:04:00,55.59,43.94,10.61,4.48,Web_Service,user_1,0


In [80]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.set_index('Timestamp', inplace = True)
df.head()

Unnamed: 0_level_0,CPU_Usage,Memory_Usage,Disk_IO,Network_IO,Workload_Type,User_ID,Anomaly_Label
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-07-01 00:00:00,18.88,43.19,11.4,6.01,Database_Query,user_1,0
2025-07-01 00:01:00,25.31,45.43,7.68,17.67,Video_Streaming,user_1,0
2025-07-01 00:02:00,3.87,49.5,14.08,3.48,Database_Query,user_1,0
2025-07-01 00:03:00,20.92,25.88,17.33,4.77,Web_Service,user_1,0
2025-07-01 00:04:00,55.59,43.94,10.61,4.48,Web_Service,user_1,0


In [81]:
# Forward fill missing values
df = df.ffill()
df

Unnamed: 0_level_0,CPU_Usage,Memory_Usage,Disk_IO,Network_IO,Workload_Type,User_ID,Anomaly_Label
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-07-01 00:00:00,18.88,43.19,11.40,6.01,Database_Query,user_1,0
2025-07-01 00:01:00,25.31,45.43,7.68,17.67,Video_Streaming,user_1,0
2025-07-01 00:02:00,3.87,49.50,14.08,3.48,Database_Query,user_1,0
2025-07-01 00:03:00,20.92,25.88,17.33,4.77,Web_Service,user_1,0
2025-07-01 00:04:00,55.59,43.94,10.61,4.48,Web_Service,user_1,0
...,...,...,...,...,...,...,...
2025-07-01 23:55:00,27.85,51.42,8.98,4.23,Web_Service,user_10,0
2025-07-01 23:56:00,38.68,43.84,6.04,25.86,Video_Streaming,user_10,0
2025-07-01 23:57:00,16.50,50.51,8.06,5.83,Database_Query,user_10,0
2025-07-01 23:58:00,26.31,31.29,9.43,3.99,Database_Query,user_10,0


In [82]:
print(f'Date range: {df.index.min()} → {df.index.max()}')

Date range: 2025-07-01 00:00:00 → 2025-07-01 23:59:00


In [83]:

len_anomaly = len(df[df['Anomaly_Label'] == 1])
len_no_anomaly = len(df[df['Anomaly_Label'] == 0])

print("Total No. of Anomaly Cases:", len_anomaly)
print("Total No. of No Anomaly Cases:", len_no_anomaly)

Total No. of Anomaly Cases: 1257
Total No. of No Anomaly Cases: 13143


In [86]:
from sklearn.preprocessing import StandardScaler

# Select numeric metric columns
features = ['CPU_Usage', 'Memory_Usage', 'Disk_IO', 'Network_IO']

X = df[features]

# Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

df.head()


Unnamed: 0_level_0,CPU_Usage,Memory_Usage,Disk_IO,Network_IO,Workload_Type,User_ID,Anomaly_Label
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-07-01 00:00:00,18.88,43.19,11.4,6.01,Database_Query,user_1,0
2025-07-01 00:01:00,25.31,45.43,7.68,17.67,Video_Streaming,user_1,0
2025-07-01 00:02:00,3.87,49.5,14.08,3.48,Database_Query,user_1,0
2025-07-01 00:03:00,20.92,25.88,17.33,4.77,Web_Service,user_1,0
2025-07-01 00:04:00,55.59,43.94,10.61,4.48,Web_Service,user_1,0


In [None]:
from sklearn.ensemble import IsolationForest

# Fit the Isolation Forest model
model = IsolationForest(
    contamination=0.1,   # assume 10% anomalies
    random_state=42
)

df['Anomaly_Pred'] = model.fit_predict(X_scaled)

# Convert: -1 = anomaly → 1, 1 = normal → 0
df['Anomaly_Pred'] = df['Anomaly_Pred'].map({1: 0, -1: 1})

df.head()

Unnamed: 0_level_0,CPU_Usage,Memory_Usage,Disk_IO,Network_IO,Workload_Type,User_ID,Anomaly_Label,Anomaly_Pred
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-07-01 00:00:00,18.88,43.19,11.4,6.01,Database_Query,user_1,0,0
2025-07-01 00:01:00,25.31,45.43,7.68,17.67,Video_Streaming,user_1,0,0
2025-07-01 00:02:00,3.87,49.5,14.08,3.48,Database_Query,user_1,0,0
2025-07-01 00:03:00,20.92,25.88,17.33,4.77,Web_Service,user_1,0,0
2025-07-01 00:04:00,55.59,43.94,10.61,4.48,Web_Service,user_1,0,0


In [89]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print('Accuracy score: ', format(accuracy_score(df["Anomaly_Label"], df["Anomaly_Pred"])))


Accuracy score:  0.8410416666666667


In [92]:
def decide_action(row):
    actions = []
    
    if row['CPU_Usage'] > 85:
        actions.append("Scale up server or add instance")
        
    if row['Memory_Usage'] > 80:
        actions.append("Restart memory-heavy service")
        
    if row['Disk_IO'] > 90:
        actions.append("Clean temp files or check disk load")
        
    if row['Network_IO'] > 90:
        actions.append("Investigate unusual traffic")
        
    return actions if actions else ["Monitor - no critical action needed"]

In [93]:
df['Recommended_Action'] = ""

anomaly_mask = df['Anomaly_Pred'] == 1

df.loc[anomaly_mask, 'Recommended_Action'] = (
    df.loc[anomaly_mask].apply(decide_action, axis=1)
)
df[df['Recommended_Action'] != ""]

Unnamed: 0_level_0,CPU_Usage,Memory_Usage,Disk_IO,Network_IO,Workload_Type,User_ID,Anomaly_Label,Anomaly_Pred,Recommended_Action
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-07-01 00:07:00,32.09,20.40,3.36,20.98,Video_Streaming,user_1,0,1,[Monitor - no critical action needed]
2025-07-01 00:14:00,44.95,43.38,26.86,15.40,Video_Streaming,user_1,0,1,[Monitor - no critical action needed]
2025-07-01 00:30:00,43.64,92.70,31.76,4.79,Backup,user_1,0,1,[Restart memory-heavy service]
2025-07-01 00:35:00,39.63,44.13,14.11,29.48,Video_Streaming,user_1,0,1,[Monitor - no critical action needed]
2025-07-01 00:37:00,30.13,54.54,8.68,33.60,Video_Streaming,user_1,0,1,[Monitor - no critical action needed]
...,...,...,...,...,...,...,...,...,...
2025-07-01 23:15:00,35.14,88.39,36.58,3.04,Backup,user_10,0,1,[Restart memory-heavy service]
2025-07-01 23:30:00,30.39,85.94,41.98,5.20,Backup,user_10,0,1,[Restart memory-heavy service]
2025-07-01 23:37:00,90.28,42.51,5.33,6.79,Crypto_Mining,user_10,1,1,[Scale up server or add instance]
2025-07-01 23:41:00,26.45,91.61,28.29,4.44,Backup,user_10,0,1,[Restart memory-heavy service]


In [94]:
def detect_anomaly(new_metrics):
    new_scaled = scaler.transform([new_metrics])
    prediction = model.predict(new_scaled)[0]
    return 1 if prediction == -1 else 0


def recommend_action(actions):
    for action in actions:
        print("Recommended action: ", action)

In [97]:
import time
import random

def simulate_metrics():
    return [
        random.uniform(10, 100),  # CPU
        random.uniform(20, 100),  # Memory
        random.uniform(5, 100),   # Disk
        random.uniform(5, 100)    # Network
    ]


for _ in range(10):
    metrics = simulate_metrics()
    
    anomaly = detect_anomaly(metrics)
    
    print("\nCurrent Metrics:", metrics)
    
    if anomaly:
        print("Anomaly detected!")
        
        metric_dict = dict(zip(features, metrics))
        actions = decide_action(metric_dict)
        recommend_action(actions)
    else:
        print("System operating normally.")
    
 


Current Metrics: [52.75284436768737, 85.58132368178185, 66.79401995637221, 26.389611818403463]
Anomaly detected!
Recommended action:  Restart memory-heavy service

Current Metrics: [56.56054230610445, 34.34338578641891, 13.971556259540174, 23.079514820442668]
System operating normally.

Current Metrics: [61.560916581879106, 98.74275304734968, 25.495650111133994, 14.075410912462662]
Anomaly detected!
Recommended action:  Restart memory-heavy service

Current Metrics: [15.061248068537873, 57.14836429378959, 91.98548309298464, 66.0961119101444]
Anomaly detected!
Recommended action:  Clean temp files or check disk load

Current Metrics: [36.15398856341822, 87.24166219986088, 79.39953824565804, 34.184564931779185]
Anomaly detected!
Recommended action:  Restart memory-heavy service

Current Metrics: [30.06326821070161, 70.92812637069686, 76.06276084221683, 91.52277806417443]
Anomaly detected!
Recommended action:  Investigate unusual traffic

Current Metrics: [31.36720685678745, 58.327327591

