# Performance Modeling

Build statistical and probabilistic models of microservice performance.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import sys
sys.path.append('../src')

from model import MicroserviceInteractionModel
from analyzer import PerformanceAnalyzer

%matplotlib inline

## 1. Load Data

In [None]:
# Load processed traces
traces = pd.read_csv('../data/processed/parsed_traces.csv')
print(f"Loaded {len(traces)} trace records")
traces.head()

## 2. Fit Latency Distributions

In [None]:
# Build interaction model
model = MicroserviceInteractionModel()

# Fit latency model for overall system
latency_model = model.fit_latency_distribution(traces)

print("Overall Latency Model:")
print(f"  Distribution: {latency_model['distribution']}")
print(f"  Mean: {latency_model['mean']:.2f} ms")
print(f"  Median: {latency_model['median']:.2f} ms")
print(f"  P95: {latency_model['p95']:.2f} ms")
print(f"  P99: {latency_model['p99']:.2f} ms")

## 3. Service Pair Models

In [None]:
# Fit models for top service pairs
if 'caller' in traces.columns and 'callee' in traces.columns:
    top_pairs = traces.groupby(['caller', 'callee']).size().nlargest(5)
    
    print("Latency Models for Top 5 Service Pairs:\n")
    for (caller, callee), count in top_pairs.items():
        pair = (caller, callee)
        pair_model = model.fit_latency_distribution(traces, pair)
        print(f"{caller} -> {callee}:")
        print(f"  Call count: {count}")
        print(f"  Mean latency: {pair_model['mean']:.2f} ms")
        print(f"  P95: {pair_model['p95']:.2f} ms")
        print()

## 4. Markov Chain Model

In [None]:
# Build Markov chain if trace data available
if 'trace_id' in traces.columns and 'service' in traces.columns:
    transition_probs = model.build_markov_chain(traces)
    
    print("Markov Chain Transition Probabilities:")
    print(f"Number of states: {len(transition_probs)}")
    
    # Show example transitions
    for service in list(transition_probs.keys())[:3]:
        print(f"\nFrom {service}:")
        for next_service, prob in transition_probs[service].items():
            print(f"  -> {next_service}: {prob:.3f}")

## 5. Predict Service Paths

In [None]:
# Predict likely service call paths
if model.transition_probabilities:
    start_service = list(model.transition_probabilities.keys())[0]
    
    print(f"Predicting service paths starting from: {start_service}\n")
    
    for i in range(3):
        path = model.predict_service_path(start_service, max_steps=5)
        print(f"Path {i+1}: {' -> '.join(path)}")

## 6. Cluster Latency Patterns

In [None]:
# Cluster latency patterns
if 'latency' in traces.columns:
    gmm = model.cluster_latency_patterns(traces, n_clusters=3)
    
    # Visualize clusters
    plt.figure(figsize=(12, 6))
    for i in range(3):
        cluster_data = traces[traces['latency_cluster'] == i]['latency']
        plt.hist(cluster_data, bins=30, alpha=0.5, label=f'Cluster {i}')
    
    plt.xlabel('Latency (ms)')
    plt.ylabel('Frequency')
    plt.title('Latency Clusters')
    plt.legend()
    plt.show()

## 7. Performance Analysis

In [None]:
# Comprehensive performance analysis
analyzer = PerformanceAnalyzer()

# Latency analysis
latency_stats = analyzer.analyze_latency_distribution(traces)
print("Latency Statistics:")
for key, value in latency_stats.items():
    print(f"  {key}: {value:.2f} ms")

# Identify hotspots
print("\nPerformance Hotspots:")
hotspots = analyzer.identify_hotspots(traces, top_n=5)
print(hotspots)

# Detect anomalies
anomalies = analyzer.detect_anomalies(traces)
print(f"\nDetected {len(anomalies)} anomalous traces")

## 8. Save Models

In [None]:
# Save trained models
model.save_models('../models/interaction_models.pkl')
print("Models saved successfully")