# Volume & Frequency Features: - over the window.

1. Log count per service per window
2. Error count per window
3. Error rate (error_count / total_count)
4. WARN-to-INFO ratio
5. Count all the error types 



# why we calculate this: - keep in mind this may help when I forget

- **Sudden spikes** in log_count/error_count often signal outages.
- **Drops** in log_count may indicate logging failures.
- **High error_rate** reveals system degradation.
- **WARN-to-INFO ratio changes** reveal hidden problems.


In [165]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Load Data

In [166]:
df = pd.read_parquet('../data/training_logs.parquet')
df['timestamp'] = pd.to_datetime(df['timestamp'])

print(f"Total logs: {len(df):,}")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"Services: {df['service'].nunique()}")
print(f"Log levels: {df['level'].unique()}")
df.head()

Total logs: 131,935
Date range: 2025-12-17 23:44:21.030517+00:00 to 2025-12-26 23:44:14.821532+00:00
Services: 7
Log levels: ['DEBUG' 'WARN' 'INFO' 'ERROR' 'FATAL']


Unnamed: 0,timestamp,level,service,message,is_anomaly,anomaly_type
0,2025-12-17 23:44:21.030517+00:00,DEBUG,auth-service,Cache miss for key: cache_70,0,
1,2025-12-17 23:44:26.617160+00:00,WARN,api-gateway,Slow query detected - duration: 184ms,0,
2,2025-12-17 23:44:31.353882+00:00,INFO,notification-service,"Database query completed - rows: 297, duration...",0,
3,2025-12-17 23:44:38.295143+00:00,DEBUG,user-service,Cache miss for key: cache_89,0,
4,2025-12-17 23:44:44.889385+00:00,INFO,order-service,User session created - session_id: sess_665158,0,


In [None]:
# "timestamp": "2025-12-26T18:26:51.691Z", "level": "INFO", "service": "user-service", "message": "API request received - endpoint: /api/payments, method: PUT", "is_anomaly": 0

## 1. Volume & Frequency Features

We'll calculate features using time windows (30 seconds, 1 minute, 5 minutes) to detect anomalies.

### Configure Time Windows

In [167]:
WINDOW_SIZE = '30s'  # 30-second windows.

# This meets the requirement: detect anomalies < 30 seconds

### Helper Functions

In [168]:
def calculate_window_features(df: pd.DataFrame, window: str = '30s') -> pd.DataFrame:
    """
    Calculate volume and frequency features per time window.

    param df: Log dataframe with timestamp, level, service columns.
    param window: Time window size (e.g., '30s', '1min', '5min').
    """
    df_indexed = df.set_index('timestamp')
    
    # Create binary flags for each log level.
    df_indexed['is_error'] = (df_indexed['level'] == 'ERROR').astype(int)
    df_indexed['is_fatal'] = (df_indexed['level'] == 'FATAL').astype(int)
    df_indexed['is_warn'] = (df_indexed['level'] == 'WARN').astype(int)
    df_indexed['is_info'] = (df_indexed['level'] == 'INFO').astype(int)
    df_indexed['is_debug'] = (df_indexed['level'] == 'DEBUG').astype(int)
    
    # Aggregate features per window.
    window_features = df_indexed.resample(window).agg({
        'level': 'count',              # Total log count.
        'is_error': 'sum',             # Error count.
        'is_fatal': 'sum',             # Fatal count.
        'is_warn': 'sum',              # Warning count.
        'is_info': 'sum',              # Info count.
        'is_debug': 'sum'              # Debug count.
    }).reset_index()
    
    window_features.columns = [
        'timestamp', 'log_count', 'error_count', 'fatal_count',
        'warn_count', 'info_count', 'debug_count'
    ]
    
    window_features['error_rate'] = (
        window_features['error_count'] / window_features['log_count']
    ).fillna(0)
    
    window_features['fatal_rate'] = (
        window_features['fatal_count'] / window_features['log_count']
    ).fillna(0)
    
    window_features['warn_rate'] = (
        window_features['warn_count'] / window_features['log_count']
    ).fillna(0)
    
    window_features['warn_to_info_ratio'] = (
        window_features['warn_count'] / window_features['info_count'].replace(0, 1)
    ).fillna(0)
    
    # Error + Fatal combined.
    window_features['critical_count'] = (
        window_features['error_count'] + window_features['fatal_count']
    )
    
    window_features['critical_rate'] = (
        window_features['critical_count'] / window_features['log_count']
    ).fillna(0)
    
    return window_features


def calculate_per_service_features(
    df: pd.DataFrame,
    window: str = '30s'
) -> pd.DataFrame:
    """
    Calculate volume and frequency features per service per window.

    param df: Log dataframe.
    param window: Time window size.
    """
    df_indexed = df.set_index('timestamp')
    
    df_indexed['is_error'] = (df_indexed['level'] == 'ERROR').astype(int)
    df_indexed['is_fatal'] = (df_indexed['level'] == 'FATAL').astype(int)
    df_indexed['is_warn'] = (df_indexed['level'] == 'WARN').astype(int)
    df_indexed['is_info'] = (df_indexed['level'] == 'INFO').astype(int)
    
    service_features = df_indexed.groupby('service').resample(window).agg({
        'level': 'count',
        'is_error': 'sum',
        'is_fatal': 'sum',
        'is_warn': 'sum',
        'is_info': 'sum'
    }).reset_index()
    
    service_features.columns = [
        'service', 'timestamp', 'log_count', 'error_count',
        'fatal_count', 'warn_count', 'info_count'
    ]
    
    service_features['error_rate'] = (
        service_features['error_count'] / service_features['log_count']
    ).fillna(0)
    
    service_features['warn_to_info_ratio'] = (
        service_features['warn_count'] / service_features['info_count'].replace(0, 1)
    ).fillna(0)
    
    service_features['critical_count'] = (
        service_features['error_count'] + service_features['fatal_count']
    )
    
    service_features['critical_rate'] = (
        service_features['critical_count'] / service_features['log_count']
    ).fillna(0)
    
    return service_features

### Calculate Global Features (All Services Combined)

In [169]:
# Calculate features for entire system.
global_features = calculate_window_features(df, window=WINDOW_SIZE)

print(f"Global features calculated: {len(global_features):,} windows")
print(f"\nFeature columns:")
print(global_features.columns.tolist())
print(f"\nSample:")
global_features.head(10)

Global features calculated: 25,921 windows

Feature columns:
['timestamp', 'log_count', 'error_count', 'fatal_count', 'warn_count', 'info_count', 'debug_count', 'error_rate', 'fatal_rate', 'warn_rate', 'warn_to_info_ratio', 'critical_count', 'critical_rate']

Sample:


Unnamed: 0,timestamp,log_count,error_count,fatal_count,warn_count,info_count,debug_count,error_rate,fatal_rate,warn_rate,warn_to_info_ratio,critical_count,critical_rate
0,2025-12-17 23:44:00+00:00,2,0,0,1,0,1,0.0,0.0,0.5,1.0,0,0.0
1,2025-12-17 23:44:30+00:00,5,0,0,1,3,1,0.0,0.0,0.2,0.333333,0,0.0
2,2025-12-17 23:45:00+00:00,5,0,0,0,3,2,0.0,0.0,0.0,0.0,0,0.0
3,2025-12-17 23:45:30+00:00,6,0,0,1,3,2,0.0,0.0,0.166667,0.333333,0,0.0
4,2025-12-17 23:46:00+00:00,5,0,0,2,2,1,0.0,0.0,0.4,1.0,0,0.0
5,2025-12-17 23:46:30+00:00,5,0,0,1,4,0,0.0,0.0,0.2,0.25,0,0.0
6,2025-12-17 23:47:00+00:00,5,1,0,0,4,0,0.2,0.0,0.0,0.0,1,0.2
7,2025-12-17 23:47:30+00:00,5,0,0,0,3,2,0.0,0.0,0.0,0.0,0,0.0
8,2025-12-17 23:48:00+00:00,6,0,0,1,4,1,0.0,0.0,0.166667,0.25,0,0.0
9,2025-12-17 23:48:30+00:00,5,0,1,1,3,0,0.0,0.2,0.2,0.333333,1,0.2


### Calculate Per-Service Features

In [170]:
# Calculate features per service.
service_features = calculate_per_service_features(df, window=WINDOW_SIZE)

print(f"Per-service features calculated: {len(service_features):,} windows")
print(f"Services: {service_features['service'].nunique()}")
print(f"\nSample:")
service_features.head(10)

Per-service features calculated: 181,425 windows
Services: 7

Sample:


Unnamed: 0,service,timestamp,log_count,error_count,fatal_count,warn_count,info_count,error_rate,warn_to_info_ratio,critical_count,critical_rate
0,api-gateway,2025-12-17 23:44:00+00:00,1,0,0,1,0,0.0,1.0,0,0.0
1,api-gateway,2025-12-17 23:44:30+00:00,0,0,0,0,0,0.0,0.0,0,0.0
2,api-gateway,2025-12-17 23:45:00+00:00,3,0,0,0,2,0.0,0.0,0,0.0
3,api-gateway,2025-12-17 23:45:30+00:00,1,0,0,0,0,0.0,0.0,0,0.0
4,api-gateway,2025-12-17 23:46:00+00:00,2,0,0,1,1,0.0,1.0,0,0.0
5,api-gateway,2025-12-17 23:46:30+00:00,1,0,0,0,1,0.0,0.0,0,0.0
6,api-gateway,2025-12-17 23:47:00+00:00,0,0,0,0,0,0.0,0.0,0,0.0
7,api-gateway,2025-12-17 23:47:30+00:00,0,0,0,0,0,0.0,0.0,0,0.0
8,api-gateway,2025-12-17 23:48:00+00:00,1,0,0,0,1,0.0,0.0,0,0.0
9,api-gateway,2025-12-17 23:48:30+00:00,1,0,0,0,1,0.0,0.0,0,0.0


## 2. Feature Statistics

In [171]:
print("="*60)
print("GLOBAL FEATURE STATISTICS")
print("="*60)
print("\nLog Count per Window:")
print(global_features['log_count'].describe())

print("\nError Rate per Window:")
print(global_features['error_rate'].describe())

print("\nCritical Rate per Window (ERROR + FATAL):")
print(global_features['critical_rate'].describe())

print("\nWARN-to-INFO Ratio:")
print(global_features['warn_to_info_ratio'].describe())

GLOBAL FEATURE STATISTICS

Log Count per Window:
count    25921.000000
mean         5.089889
std          1.731834
min          0.000000
25%          5.000000
50%          5.000000
75%          6.000000
max         43.000000
Name: log_count, dtype: float64

Error Rate per Window:
count    25921.000000
mean         0.046282
std          0.101940
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          1.000000
Name: error_rate, dtype: float64

Critical Rate per Window (ERROR + FATAL):
count    25921.000000
mean         0.052387
std          0.109844
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          1.000000
Name: critical_rate, dtype: float64

WARN-to-INFO Ratio:
count    25921.000000
mean         0.217966
std          0.405837
min          0.000000
25%          0.000000
50%          0.000000
75%          0.333333
max         14.000000
Name: warn_to_info_ratio, dtype: float64


## 3. Visualize Volume Features

### Log Count Over Time

In [172]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=global_features['timestamp'],
        y=global_features['log_count'],
        mode='lines',
        name='Log Count',
        line=dict(color='steelblue', width=1.5)
    )
)

fig.update_layout(
    title=f'Log Count per Window ({WINDOW_SIZE})',
    xaxis_title='Timestamp',
    yaxis_title='Log Count',
    height=500,
    hovermode='x unified'
)
fig.show()

In [173]:
# - has a clear count distribution of morning and even

### Error Rate Over Time

In [174]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=global_features['timestamp'],
        y=global_features['error_rate'],
        mode='lines',
        name='Error Rate',
        line=dict(color='crimson', width=1.5)
    )
)

fig.update_layout(
    title=f'Error Rate per Window ({WINDOW_SIZE})',
    xaxis_title='Timestamp',
    yaxis_title='Error Rate',
    height=500,
    hovermode='x unified'
)
fig.show()

### Critical Rate Over Time (ERROR + FATAL)

In [175]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=global_features['timestamp'],
        y=global_features['critical_rate'],
        mode='lines',
        name='Critical Rate',
        line=dict(color='orange', width=1.5)
    )
)

fig.update_layout(
    title=f'Critical Rate (ERROR+FATAL) per Window ({WINDOW_SIZE})',
    xaxis_title='Timestamp',
    yaxis_title='Critical Rate',
    height=500,
    hovermode='x unified'
)
fig.show()

### WARN-to-INFO Ratio

In [176]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=global_features['timestamp'],
        y=global_features['warn_to_info_ratio'],
        mode='lines',
        name='WARN/INFO Ratio',
        line=dict(color='purple', width=1.5)
    )
)

fig.update_layout(
    title=f'WARN-to-INFO Ratio per Window ({WINDOW_SIZE})',
    xaxis_title='Timestamp',
    yaxis_title='WARN/INFO Ratio',
    height=500,
    hovermode='x unified'
)
fig.show()

## 4. Per-Service Analysis

In [177]:
sample_service = service_features['service'].iloc[0]
service_data = service_features[service_features['service'] == sample_service]

print(f"Analyzing service: {sample_service}")
print(f"Total windows: {len(service_data):,}")

Analyzing service: api-gateway
Total windows: 25,920


In [178]:
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=(
        f'{sample_service} - Log Count',
        f'{sample_service} - Error Rate'
    ),
    vertical_spacing=0.15
)

fig.add_trace(
    go.Scatter(
        x=service_data['timestamp'],
        y=service_data['log_count'],
        mode='lines',
        name='Log Count',
        line=dict(color='steelblue')
    ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=service_data['timestamp'],
        y=service_data['error_rate'],
        mode='lines',
        name='Error Rate',
        line=dict(color='crimson'),
        showlegend=False
    ),
    row=2, col=1
)

fig.update_xaxes(title_text='Timestamp', row=2, col=1)
fig.update_yaxes(title_text='Log Count', row=1, col=1)
fig.update_yaxes(title_text='Error Rate', row=2, col=1)
fig.update_layout(height=700)
fig.show()

## 5. Save Engineered Features

In [179]:
import os

features_dir = '../data/features'
os.makedirs(features_dir, exist_ok=True)

global_output = os.path.join(features_dir, 'global_features.parquet')
global_features.to_parquet(global_output, index=False)
print(f"✓ Global features saved: {global_output}")
print(f"  Shape: {global_features.shape}")

service_output = os.path.join(features_dir, 'service_features.parquet')
service_features.to_parquet(service_output, index=False)
print(f"✓ Per-service features saved: {service_output}")
print(f"  Shape: {service_features.shape}")

✓ Global features saved: ../data/features/global_features.parquet
  Shape: (25921, 13)
✓ Per-service features saved: ../data/features/service_features.parquet
  Shape: (181425, 11)
