# Lab 14: Advanced C2 Traffic Analysis

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/depalmar/ai_for_the_win/blob/main/notebooks/lab14_c2_traffic.ipynb)

Detect command-and-control communications from modern attack frameworks using ML and AI.

## Learning Objectives
- Multi-framework C2 detection (Cobalt Strike, Sliver, Havoc, Brute Ratel, Mythic)
- Advanced beaconing detection with jitter analysis
- DNS tunneling and DoH/DoT covert channels
- JA3/JA3S and JA4+ TLS fingerprinting
- Encrypted traffic analysis without decryption
- LLM-powered traffic pattern classification
- MITRE ATT&CK mapping (T1071, T1572, T1573)

## Modern C2 Landscape

Modern C2 frameworks employ sophisticated evasion techniques:
- **Malleable profiles**: Customizable traffic patterns mimicking legitimate apps
- **Domain fronting**: Hiding C2 behind legitimate CDN domains
- **Sleep/jitter**: Variable timing to evade statistical detection
- **Encrypted channels**: TLS, DNS-over-HTTPS, custom encryption
- **Protocol tunneling**: HTTP/S, DNS, SMB, ICMP tunneling

In [None]:
!pip install pandas numpy scikit-learn anthropic -q

In [None]:
import pandas as pd
import numpy as np
from typing import List, Dict, Tuple, Optional
from collections import Counter
from dataclasses import dataclass, field
from enum import Enum
from datetime import datetime, timedelta
import hashlib
import re
import json

# C2 Framework definitions with realistic characteristics
class C2Framework(Enum):
    COBALT_STRIKE = "cobalt_strike"
    SLIVER = "sliver"
    HAVOC = "havoc"
    BRUTE_RATEL = "brute_ratel"
    MYTHIC = "mythic"
    POSHC2 = "poshc2"
    COVENANT = "covenant"
    MERLIN = "merlin"
    UNKNOWN = "unknown"

# Comprehensive C2 framework fingerprints
C2_FRAMEWORK_PROFILES = {
    C2Framework.COBALT_STRIKE: {
        "name": "Cobalt Strike",
        "default_ports": [80, 443, 8080, 8443],
        "default_jitter": 0.1,  # 10% jitter
        "default_sleep": 60000,  # 60 seconds
        "ja3_hashes": [
            "72a589da586844d7f0818ce684948eea",  # Default beacon
            "a0e9f5d64349fb13191bc781f81f42e1",  # Common variant
            "e7d705a3286e19ea42f587b344ee6865",  # 4.x version
        ],
        "ja3s_hashes": ["b742b407517bac9536a77a7b0fee28e9"],
        "uri_patterns": ["/submit.php", "/__utm.gif", "/pixel.gif", "/ca", "/push", "/visit.js"],
        "malleable_indicators": ["transform-x64", "transform-x86", "http-get", "http-post"],
        "mitre": ["T1071.001", "T1572", "T1573.002"],
    },
    C2Framework.SLIVER: {
        "name": "Sliver",
        "default_ports": [443, 8888, 31337],
        "default_jitter": 0.3,  # 30% jitter
        "default_sleep": 30000,  # 30 seconds
        "ja3_hashes": [
            "19e29534fd49dd27d09234e639c4057e",
            "c12f54a3f91dc7bafd92c258cfd27665",
        ],
        "ja3s_hashes": ["80b3a14bccc8598a1f3bbe83e71f735f"],
        "uri_patterns": ["/rpc/", "/api/", "/.well-known/", "/static/"],
        "mtls_indicators": True,
        "mitre": ["T1071.001", "T1071.004", "T1573.002"],
    },
    C2Framework.HAVOC: {
        "name": "Havoc",
        "default_ports": [443, 40056],
        "default_jitter": 0.2,
        "default_sleep": 5000,
        "ja3_hashes": ["bd0bf25947d4a37404f0424edf4db9ad"],
        "uri_patterns": ["/", "/demon", "/agent"],
        "websocket_c2": True,
        "mitre": ["T1071.001", "T1571"],
    },
    C2Framework.BRUTE_RATEL: {
        "name": "Brute Ratel C4",
        "default_ports": [443, 8443],
        "default_jitter": 0.4,  # High jitter for evasion
        "default_sleep": 120000,
        "ja3_hashes": ["cd08e31494f9531f560d64c695473da9"],
        "uri_patterns": ["/api/v1/", "/news/", "/update/check"],
        "doh_support": True,  # DNS over HTTPS
        "mitre": ["T1071.001", "T1071.004", "T1568"],
    },
    C2Framework.MYTHIC: {
        "name": "Mythic",
        "default_ports": [80, 443, 7443],
        "default_jitter": 0.25,
        "default_sleep": 10000,
        "ja3_hashes": ["a441a33aaee795f498d6b764cc78989d"],
        "uri_patterns": ["/api/v1/", "/agent/", "/crypto/"],
        "agents": ["Apollo", "Athena", "Poseidon", "Apfell"],
        "mitre": ["T1071.001", "T1573.002"],
    }
}

@dataclass
class NetworkFlow:
    """Network flow with C2-relevant features."""
    timestamp: datetime
    src_ip: str
    src_port: int
    dst_ip: str
    dst_port: int
    protocol: str
    bytes_sent: int
    bytes_received: int
    packets_sent: int
    packets_received: int
    duration_ms: float
    ja3_hash: str = ""
    ja3s_hash: str = ""
    server_name: str = ""
    uri: str = ""
    user_agent: str = ""
    http_method: str = ""
    response_code: int = 0
    
print(f"Loaded {len(C2_FRAMEWORK_PROFILES)} C2 framework profiles")

## 1. Beaconing Detection

C2 beacons have regular intervals with low jitter (variation).

In [None]:
def generate_c2_traffic(framework: C2Framework, num_connections: int = 100, 
                         custom_jitter: float = None, custom_sleep: int = None) -> List[NetworkFlow]:
    """Generate realistic C2 beacon traffic for a specific framework."""
    profile = C2_FRAMEWORK_PROFILES.get(framework, {})
    
    jitter = custom_jitter if custom_jitter else profile.get("default_jitter", 0.1)
    sleep_ms = custom_sleep if custom_sleep else profile.get("default_sleep", 60000)
    
    flows = []
    base_time = datetime.now() - timedelta(hours=2)
    current_time = base_time
    
    # C2 server characteristics
    dst_ip = f"185.{np.random.randint(0,255)}.{np.random.randint(0,255)}.{np.random.randint(0,255)}"
    dst_port = np.random.choice(profile.get("default_ports", [443]))
    ja3 = np.random.choice(profile.get("ja3_hashes", ["unknown"]))
    ja3s = profile.get("ja3s_hashes", ["unknown"])[0] if profile.get("ja3s_hashes") else "unknown"
    uris = profile.get("uri_patterns", ["/"])
    
    for i in range(num_connections):
        # Calculate next connection time with jitter
        jitter_factor = 1 + np.random.uniform(-jitter, jitter)
        interval_ms = sleep_ms * jitter_factor
        current_time = current_time + timedelta(milliseconds=interval_ms)
        
        # Beacon characteristics
        bytes_sent = np.random.randint(100, 500)  # Small beacons
        bytes_received = np.random.randint(50, 2000)  # Variable response
        
        flows.append(NetworkFlow(
            timestamp=current_time,
            src_ip="192.168.1.100",
            src_port=np.random.randint(49152, 65535),
            dst_ip=dst_ip,
            dst_port=dst_port,
            protocol="TLS",
            bytes_sent=bytes_sent,
            bytes_received=bytes_received,
            packets_sent=np.random.randint(3, 10),
            packets_received=np.random.randint(2, 8),
            duration_ms=np.random.uniform(50, 500),
            ja3_hash=ja3,
            ja3s_hash=ja3s,
            server_name=f"cdn-{np.random.randint(1,99)}.cloudfront.net",
            uri=np.random.choice(uris),
            http_method="POST" if i % 5 == 0 else "GET",
            response_code=200
        ))
    
    return flows

def generate_normal_traffic(num_connections: int = 200) -> List[NetworkFlow]:
    """Generate normal user browsing traffic."""
    flows = []
    base_time = datetime.now() - timedelta(hours=2)
    
    # Common legitimate destinations
    destinations = [
        ("142.250.185.46", "google.com", 443),
        ("157.240.1.35", "facebook.com", 443),
        ("52.94.236.248", "amazon.com", 443),
        ("104.244.42.193", "twitter.com", 443),
        ("13.107.42.14", "microsoft.com", 443),
    ]
    
    for i in range(num_connections):
        # Random intervals (not beaconing)
        interval = np.random.exponential(30)  # Exponential distribution
        current_time = base_time + timedelta(seconds=i * interval)
        
        dst = destinations[np.random.randint(0, len(destinations))]
        
        flows.append(NetworkFlow(
            timestamp=current_time,
            src_ip="192.168.1.100",
            src_port=np.random.randint(49152, 65535),
            dst_ip=dst[0],
            dst_port=dst[2],
            protocol="TLS",
            bytes_sent=np.random.randint(500, 5000),
            bytes_received=np.random.randint(1000, 100000),
            packets_sent=np.random.randint(5, 50),
            packets_received=np.random.randint(10, 200),
            duration_ms=np.random.uniform(100, 5000),
            ja3_hash="769,47-53-5-10-49171-49172-49161-49162",  # Common browser
            ja3s_hash="",
            server_name=dst[1],
            uri="/" if np.random.random() > 0.3 else f"/page{np.random.randint(1,100)}",
            http_method="GET",
            response_code=200
        ))
    
    return flows

class AdvancedBeaconDetector:
    """Advanced beacon detection with jitter analysis and framework identification."""
    
    def __init__(self, min_connections: int = 10, jitter_threshold: float = 0.5):
        self.min_connections = min_connections
        self.jitter_threshold = jitter_threshold
    
    def analyze_intervals(self, flows: List[NetworkFlow]) -> Dict:
        """Analyze connection intervals for beaconing patterns."""
        if len(flows) < self.min_connections:
            return {"is_beacon": False, "reason": "Insufficient connections"}
        
        # Sort by timestamp
        sorted_flows = sorted(flows, key=lambda x: x.timestamp)
        
        # Calculate intervals
        intervals = []
        for i in range(1, len(sorted_flows)):
            interval = (sorted_flows[i].timestamp - sorted_flows[i-1].timestamp).total_seconds()
            intervals.append(interval)
        
        if not intervals:
            return {"is_beacon": False, "reason": "No intervals calculated"}
        
        intervals = np.array(intervals)
        
        # Statistical analysis
        mean_interval = np.mean(intervals)
        std_interval = np.std(intervals)
        median_interval = np.median(intervals)
        
        # Calculate jitter (coefficient of variation)
        jitter = std_interval / mean_interval if mean_interval > 0 else 1.0
        
        # Calculate skewness (beacons are typically symmetric around mean)
        skewness = 0
        if std_interval > 0:
            skewness = np.mean(((intervals - mean_interval) / std_interval) ** 3)
        
        # Detect periodicity using autocorrelation
        autocorr = np.correlate(intervals - mean_interval, intervals - mean_interval, mode='full')
        autocorr = autocorr[len(autocorr)//2:]  # Take positive lags
        autocorr = autocorr / autocorr[0]  # Normalize
        
        # Find peaks in autocorrelation (indicates periodicity)
        periodicity_score = 0
        if len(autocorr) > 10:
            peaks = []
            for i in range(1, min(len(autocorr)-1, 50)):
                if autocorr[i] > autocorr[i-1] and autocorr[i] > autocorr[i+1] and autocorr[i] > 0.3:
                    peaks.append((i, autocorr[i]))
            periodicity_score = len(peaks) / 10  # Normalize
        
        # Calculate beacon score
        beacon_score = 0.0
        
        # Low jitter = high beacon probability
        if jitter < 0.1:
            beacon_score += 0.4
        elif jitter < 0.2:
            beacon_score += 0.3
        elif jitter < 0.3:
            beacon_score += 0.2
        elif jitter < self.jitter_threshold:
            beacon_score += 0.1
        
        # Regular intervals (low skewness)
        if abs(skewness) < 0.5:
            beacon_score += 0.2
        
        # Periodicity detected
        beacon_score += min(periodicity_score * 0.3, 0.3)
        
        # Consistent connection count per time window
        beacon_score += 0.1  # Baseline for having enough connections
        
        result = {
            "is_beacon": beacon_score > 0.5,
            "beacon_score": round(beacon_score, 3),
            "statistics": {
                "mean_interval_sec": round(mean_interval, 2),
                "std_interval_sec": round(std_interval, 2),
                "median_interval_sec": round(median_interval, 2),
                "jitter_percent": round(jitter * 100, 1),
                "skewness": round(skewness, 3),
                "periodicity_score": round(periodicity_score, 3),
            },
            "estimated_sleep_ms": int(mean_interval * 1000),
            "estimated_jitter": round(jitter, 3),
            "connection_count": len(flows),
        }
        
        return result

# Generate traffic samples
print("Generating C2 and normal traffic samples...")
print("=" * 60)

# Generate C2 traffic for each framework
c2_samples = {}
for framework in [C2Framework.COBALT_STRIKE, C2Framework.SLIVER, C2Framework.HAVOC, C2Framework.BRUTE_RATEL]:
    c2_samples[framework] = generate_c2_traffic(framework, num_connections=50)
    print(f"Generated {len(c2_samples[framework])} flows for {framework.value}")

# Generate normal traffic
normal_flows = generate_normal_traffic(num_connections=100)
print(f"Generated {len(normal_flows)} normal traffic flows")

# Analyze each sample
detector = AdvancedBeaconDetector()
print("\n" + "=" * 60)
print("Beacon Detection Analysis")
print("=" * 60)

for framework, flows in c2_samples.items():
    result = detector.analyze_intervals(flows)
    profile = C2_FRAMEWORK_PROFILES[framework]
    print(f"\n{framework.value.upper()}:")
    print(f"  Beacon detected: {result['is_beacon']}")
    print(f"  Beacon score: {result['beacon_score']:.1%}")
    print(f"  Estimated sleep: {result['estimated_sleep_ms']}ms (expected: {profile['default_sleep']}ms)")
    print(f"  Estimated jitter: {result['estimated_jitter']:.1%} (expected: {profile['default_jitter']:.1%})")

# Normal traffic analysis
normal_result = detector.analyze_intervals(normal_flows)
print(f"\nNORMAL TRAFFIC:")
print(f"  Beacon detected: {normal_result['is_beacon']}")
print(f"  Beacon score: {normal_result['beacon_score']:.1%}")
print(f"  Jitter: {normal_result['statistics']['jitter_percent']}% (high = not a beacon)")

## 2. DNS Tunneling Detection

In [None]:
# Enhanced DNS Tunneling Detection

# DNS tunneling sample data (more comprehensive)
DNS_SAMPLES = {
    "normal": [
        "www.google.com",
        "api.github.com", 
        "cdn.example.com",
        "mail.company.com",
        "static.cloudflare.com",
        "fonts.googleapis.com",
        "analytics.google.com",
        "login.microsoftonline.com",
    ],
    "dnscat2": [
        "dnscat.a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4.evil.com",
        "dnscat.f6e5d4c3b2a1f6e5d4c3b2a1f6e5d4c3.evil.com",
        "dnscat.1a2b3c4d5e6f1a2b3c4d5e6f1a2b3c4d.evil.com",
    ],
    "iodine": [
        "t1AAAB.z.tunnel.example.com",
        "t1AAAC.z.tunnel.example.com",
        "t1AAAD.z.tunnel.example.com",
    ],
    "cobalt_strike_dns": [
        "aGVsbG8gd29ybGQgdGhpcyBpcyBhIHRlc3Q.cdn.evil.com",
        "c29tZSBlbmNvZGVkIGRhdGEgaGVyZQ.cdn.evil.com",
        "bW9yZSBkYXRhIGV4ZmlsdHJhdGlvbg.cdn.evil.com",
    ],
    "sliver_dns": [
        "_dmarc.1a2b3c4d5e6f.sliver.attacker.com",
        "_dmarc.7g8h9i0j1k2l.sliver.attacker.com",
        "txt.3m4n5o6p7q8r.sliver.attacker.com",
    ],
    "doh_tunnel": [  # DNS over HTTPS tunneling
        "dns.google/resolve?name=encoded-payload.evil.com",
        "cloudflare-dns.com/dns-query?name=data123.evil.com",
    ]
}

class AdvancedDNSTunnelDetector:
    """Advanced DNS tunneling detection with multiple technique identification."""
    
    def __init__(self):
        # Entropy thresholds for different components
        self.subdomain_entropy_threshold = 3.5
        self.subdomain_length_threshold = 25
        
        # Known DNS tunneling tools patterns
        self.tool_signatures = {
            "dnscat2": r"dnscat\.[a-f0-9]{32}",
            "iodine": r"t[0-9][A-Za-z0-9]{4}\.[a-z]\.",
            "cobalt_strike": r"[A-Za-z0-9+/=]{20,}\.cdn\.",
            "sliver": r"(_dmarc|txt)\.[a-z0-9]{12,}\.",
        }
        
        # Suspicious TLD patterns
        self.suspicious_tlds = {".tk", ".ml", ".ga", ".cf", ".gq", ".xyz", ".top", ".work"}
    
    def calculate_entropy(self, s: str) -> float:
        """Calculate Shannon entropy of a string."""
        if not s:
            return 0.0
        freq = Counter(s)
        probs = [f/len(s) for f in freq.values()]
        return -sum(p * np.log2(p) for p in probs if p > 0)
    
    def analyze_query(self, query: str) -> Dict:
        """Comprehensive DNS query analysis."""
        parts = query.split('.')
        
        # Extract components
        subdomain = '.'.join(parts[:-2]) if len(parts) > 2 else ""
        domain = '.'.join(parts[-2:]) if len(parts) >= 2 else query
        tld = f".{parts[-1]}" if parts else ""
        
        # Calculate metrics
        subdomain_entropy = self.calculate_entropy(subdomain) if subdomain else 0
        subdomain_length = len(subdomain)
        total_length = len(query)
        
        # Label count (number of dots)
        label_count = len(parts)
        
        # Numeric ratio (high in encoded data)
        numeric_chars = sum(1 for c in subdomain if c.isdigit())
        numeric_ratio = numeric_chars / len(subdomain) if subdomain else 0
        
        # Consonant ratio (encoded data often has unusual consonant patterns)
        vowels = set("aeiouAEIOU")
        consonant_count = sum(1 for c in subdomain if c.isalpha() and c not in vowels)
        consonant_ratio = consonant_count / len(subdomain) if subdomain else 0
        
        # Check for base64-like patterns
        base64_pattern = re.search(r'[A-Za-z0-9+/=]{16,}', subdomain) is not None
        
        # Check for hex-like patterns
        hex_pattern = re.search(r'[a-f0-9]{16,}', subdomain.lower()) is not None
        
        # Detect tool signatures
        detected_tool = None
        for tool, pattern in self.tool_signatures.items():
            if re.search(pattern, query, re.IGNORECASE):
                detected_tool = tool
                break
        
        # Calculate tunnel score
        tunnel_score = 0.0
        indicators = []
        
        if subdomain_entropy > self.subdomain_entropy_threshold:
            tunnel_score += 0.25
            indicators.append(f"High entropy: {subdomain_entropy:.2f}")
        
        if subdomain_length > self.subdomain_length_threshold:
            tunnel_score += 0.2
            indicators.append(f"Long subdomain: {subdomain_length} chars")
        
        if base64_pattern:
            tunnel_score += 0.2
            indicators.append("Base64-like encoding")
        
        if hex_pattern:
            tunnel_score += 0.15
            indicators.append("Hex-like encoding")
        
        if numeric_ratio > 0.4:
            tunnel_score += 0.1
            indicators.append(f"High numeric ratio: {numeric_ratio:.1%}")
        
        if tld.lower() in self.suspicious_tlds:
            tunnel_score += 0.1
            indicators.append(f"Suspicious TLD: {tld}")
        
        if detected_tool:
            tunnel_score += 0.3
            indicators.append(f"Tool signature: {detected_tool}")
        
        return {
            "query": query,
            "subdomain": subdomain,
            "domain": domain,
            "is_suspicious": tunnel_score > 0.4,
            "tunnel_score": round(tunnel_score, 2),
            "detected_tool": detected_tool,
            "indicators": indicators,
            "metrics": {
                "subdomain_length": subdomain_length,
                "subdomain_entropy": round(subdomain_entropy, 2),
                "label_count": label_count,
                "numeric_ratio": round(numeric_ratio, 2),
                "consonant_ratio": round(consonant_ratio, 2),
            }
        }
    
    def analyze_batch(self, queries: List[str]) -> Dict:
        """Analyze a batch of DNS queries for tunneling."""
        results = [self.analyze_query(q) for q in queries]
        
        suspicious = [r for r in results if r["is_suspicious"]]
        tools_detected = set(r["detected_tool"] for r in results if r["detected_tool"])
        
        return {
            "total_queries": len(queries),
            "suspicious_count": len(suspicious),
            "suspicion_rate": len(suspicious) / len(queries) if queries else 0,
            "tools_detected": list(tools_detected),
            "top_suspicious": sorted(suspicious, key=lambda x: x["tunnel_score"], reverse=True)[:5]
        }

# Test DNS tunnel detection
dns_detector = AdvancedDNSTunnelDetector()

print("DNS Tunneling Detection Analysis")
print("=" * 70)

for category, queries in DNS_SAMPLES.items():
    print(f"\n{category.upper().replace('_', ' ')}:")
    print("-" * 40)
    
    for query in queries[:3]:  # Show first 3
        result = dns_detector.analyze_query(query)
        status = "SUSPICIOUS" if result["is_suspicious"] else "NORMAL"
        print(f"  [{status}] Score: {result['tunnel_score']:.0%}")
        print(f"    Query: {query[:60]}{'...' if len(query) > 60 else ''}")
        if result["indicators"]:
            print(f"    Indicators: {', '.join(result['indicators'][:3])}")
        if result["detected_tool"]:
            print(f"    Tool detected: {result['detected_tool']}")

## 3. JA3 Fingerprinting

In [None]:
# Comprehensive JA3/JA3S Fingerprint Database

JA3_DATABASE = {
    # C2 Frameworks
    "72a589da586844d7f0818ce684948eea": {"tool": "Cobalt Strike", "type": "c2", "confidence": "high"},
    "a0e9f5d64349fb13191bc781f81f42e1": {"tool": "Cobalt Strike (variant)", "type": "c2", "confidence": "high"},
    "e7d705a3286e19ea42f587b344ee6865": {"tool": "Cobalt Strike 4.x", "type": "c2", "confidence": "high"},
    "19e29534fd49dd27d09234e639c4057e": {"tool": "Sliver", "type": "c2", "confidence": "high"},
    "c12f54a3f91dc7bafd92c258cfd27665": {"tool": "Sliver (mTLS)", "type": "c2", "confidence": "high"},
    "bd0bf25947d4a37404f0424edf4db9ad": {"tool": "Havoc", "type": "c2", "confidence": "high"},
    "cd08e31494f9531f560d64c695473da9": {"tool": "Brute Ratel", "type": "c2", "confidence": "medium"},
    "a441a33aaee795f498d6b764cc78989d": {"tool": "Mythic", "type": "c2", "confidence": "medium"},
    
    # Malware families
    "e35df3e00ca4ef31d42b34bebaa2f86e": {"tool": "Trickbot", "type": "malware", "confidence": "high"},
    "6734f37431670b3ab4292b8f60f29984": {"tool": "Emotet", "type": "malware", "confidence": "high"},
    "51c64c77e60f3980eea90869b68c58a8": {"tool": "QakBot", "type": "malware", "confidence": "high"},
    "3b5074b1b5d032e5620f69f9f700ff0e": {"tool": "IcedID", "type": "malware", "confidence": "high"},
    "e35df3e00ca4ef31d42b34bebaa2f86f": {"tool": "Bumblebee", "type": "malware", "confidence": "medium"},
    
    # RATs
    "9e10692f1b7f78228b2a4e927d0c780b": {"tool": "AsyncRAT", "type": "rat", "confidence": "high"},
    "cf87d0f23e0d10c2c2a8d8e4e9a8f2b3": {"tool": "Remcos RAT", "type": "rat", "confidence": "high"},
    "7dd50e112cd23734a310b90f6f44a7cd": {"tool": "njRAT", "type": "rat", "confidence": "medium"},
    
    # Legitimate applications (for false positive reduction)
    "769,47-53-5-10-49171-49172-49161-49162": {"tool": "Chrome Browser", "type": "legitimate", "confidence": "high"},
    "771,4865-4866-4867-49195-49199": {"tool": "Firefox Browser", "type": "legitimate", "confidence": "high"},
    "771,49196-49200-159-52393-52392": {"tool": "Safari Browser", "type": "legitimate", "confidence": "high"},
    "b32309a26951912be7dba376398abc3b": {"tool": "Windows Update", "type": "legitimate", "confidence": "high"},
    "473cd7cb9faa642487833865d516e578": {"tool": "Microsoft Edge", "type": "legitimate", "confidence": "high"},
}

# JA4+ fingerprinting (newer, more robust)
JA4_SIGNATURES = {
    "t13d1516h2_8daaf6152771_b0da82dd1658": {"tool": "Cobalt Strike", "confidence": "high"},
    "t13d1715h2_5b57614c22b0_3d5424432f57": {"tool": "Sliver", "confidence": "high"},
}

class TLSFingerprintAnalyzer:
    """Advanced TLS fingerprint analysis for C2 detection."""
    
    def __init__(self):
        self.ja3_db = JA3_DATABASE
        self.ja4_db = JA4_SIGNATURES
    
    def analyze_ja3(self, ja3_hash: str) -> Dict:
        """Analyze JA3 hash against known signatures."""
        if ja3_hash in self.ja3_db:
            info = self.ja3_db[ja3_hash]
            return {
                "match": True,
                "ja3": ja3_hash,
                "tool": info["tool"],
                "type": info["type"],
                "confidence": info["confidence"],
                "is_malicious": info["type"] in ["c2", "malware", "rat"],
                "mitre": self._get_mitre_for_type(info["type"])
            }
        return {
            "match": False,
            "ja3": ja3_hash,
            "tool": "Unknown",
            "type": "unknown",
            "confidence": "none",
            "is_malicious": False
        }
    
    def analyze_ja3s(self, ja3s_hash: str) -> Dict:
        """Analyze JA3S (server) hash."""
        # Server fingerprints are less common but valuable
        known_c2_servers = {
            "b742b407517bac9536a77a7b0fee28e9": "Cobalt Strike Team Server",
            "80b3a14bccc8598a1f3bbe83e71f735f": "Sliver C2 Server",
        }
        
        if ja3s_hash in known_c2_servers:
            return {
                "match": True,
                "ja3s": ja3s_hash,
                "server_type": known_c2_servers[ja3s_hash],
                "is_c2_server": True
            }
        return {"match": False, "ja3s": ja3s_hash, "is_c2_server": False}
    
    def _get_mitre_for_type(self, tool_type: str) -> List[str]:
        """Get MITRE techniques for tool type."""
        mappings = {
            "c2": ["T1071.001", "T1573.002", "T1572"],
            "malware": ["T1105", "T1071.001"],
            "rat": ["T1219", "T1071.001"],
        }
        return mappings.get(tool_type, [])
    
    def batch_analyze(self, flows: List[NetworkFlow]) -> Dict:
        """Analyze multiple flows for suspicious fingerprints."""
        results = {
            "total_flows": len(flows),
            "malicious_fingerprints": [],
            "c2_indicators": [],
            "unique_ja3": set(),
            "framework_detections": {}
        }
        
        for flow in flows:
            if flow.ja3_hash:
                results["unique_ja3"].add(flow.ja3_hash)
                analysis = self.analyze_ja3(flow.ja3_hash)
                
                if analysis["is_malicious"]:
                    results["malicious_fingerprints"].append({
                        "ja3": flow.ja3_hash,
                        "tool": analysis["tool"],
                        "dst_ip": flow.dst_ip,
                        "timestamp": flow.timestamp.isoformat()
                    })
                    
                    # Track framework detections
                    tool = analysis["tool"]
                    if tool not in results["framework_detections"]:
                        results["framework_detections"][tool] = 0
                    results["framework_detections"][tool] += 1
        
        results["unique_ja3"] = list(results["unique_ja3"])
        return results

# Test fingerprint analysis
fp_analyzer = TLSFingerprintAnalyzer()

print("TLS Fingerprint Analysis")
print("=" * 70)

# Test against all C2 sample flows
print("\nAnalyzing C2 Traffic Fingerprints:")
for framework, flows in c2_samples.items():
    if flows:
        sample_ja3 = flows[0].ja3_hash
        result = fp_analyzer.analyze_ja3(sample_ja3)
        status = "MALICIOUS" if result["is_malicious"] else "UNKNOWN"
        print(f"\n  {framework.value}:")
        print(f"    JA3: {sample_ja3[:20]}...")
        print(f"    Status: [{status}]")
        print(f"    Detected as: {result['tool']}")
        print(f"    Confidence: {result['confidence']}")
        if result.get("mitre"):
            print(f"    MITRE: {', '.join(result['mitre'])}")

# Batch analysis
print("\n" + "=" * 70)
print("Batch Analysis Results:")
all_flows = []
for flows in c2_samples.values():
    all_flows.extend(flows)

batch_result = fp_analyzer.batch_analyze(all_flows)
print(f"  Total flows analyzed: {batch_result['total_flows']}")
print(f"  Malicious fingerprints: {len(batch_result['malicious_fingerprints'])}")
print(f"  Unique JA3 hashes: {len(batch_result['unique_ja3'])}")
print(f"  Frameworks detected: {batch_result['framework_detections']}")

## 4. Traffic Pattern Classification with ML

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Generate synthetic network flow data
np.random.seed(42)

def generate_flow_data(n_samples: int, is_c2: bool) -> np.ndarray:
    """Generate synthetic network flow features."""
    if is_c2:
        # C2: Regular intervals, consistent sizes, low ports
        intervals = np.random.normal(30, 5, n_samples)
        sizes = np.random.normal(500, 50, n_samples)
        ports = np.random.choice([80, 443, 8080], n_samples)
    else:
        # Normal: Random intervals, varied sizes
        intervals = np.random.exponential(60, n_samples)
        sizes = np.random.exponential(1000, n_samples)
        ports = np.random.randint(1024, 65535, n_samples)
    
    return np.column_stack([intervals, sizes, ports])

# Create dataset
X_c2 = generate_flow_data(100, is_c2=True)
X_normal = generate_flow_data(100, is_c2=False)
X = np.vstack([X_c2, X_normal])
y = np.array([1]*100 + [0]*100)

# Train classifier
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
clf = RandomForestClassifier(n_estimators=50, random_state=42)
clf.fit(X_train, y_train)

print(f"Model Accuracy: {clf.score(X_test, y_test):.2%}")
print(f"Feature Importance: {dict(zip(['interval', 'size', 'port'], clf.feature_importances_.round(2)))}")

## 5. LLM-Powered Traffic Analysis

In [None]:
from anthropic import Anthropic
import json

def analyze_traffic_with_llm(flows: List[Dict]) -> str:
    """Use LLM to analyze network traffic patterns."""
    
    client = Anthropic()
    
    prompt = f"""
    Analyze these network flows for C2 communication patterns.
    
    FLOWS:
    {json.dumps(flows, indent=2)}
    
    Look for:
    1. Beaconing patterns (regular intervals)
    2. Unusual port usage
    3. Suspicious payload sizes
    4. Known C2 indicators
    
    Provide MITRE ATT&CK technique mappings.
    """
    
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1500,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.content[0].text

# Sample flows for analysis
sample_flows = [
    {"src": "192.168.1.100", "dst": "45.33.32.156", "port": 443, "bytes": 512, "interval": 30},
    {"src": "192.168.1.100", "dst": "45.33.32.156", "port": 443, "bytes": 508, "interval": 31},
    {"src": "192.168.1.100", "dst": "45.33.32.156", "port": 443, "bytes": 515, "interval": 29},
]

# Uncomment to run with API key
# analysis = analyze_traffic_with_llm(sample_flows)
# print(analysis)

## Key Takeaways

1. **Beaconing**: Low jitter in connection intervals
2. **DNS Tunneling**: Long subdomains, high entropy
3. **JA3**: TLS fingerprints identify malware families
4. **ML Classification**: Combine features for detection

## Next Steps
- **Lab 15**: Lateral Movement Detection
- **Lab 16**: Threat Actor Profiling