# Task 2. Performance Comparison of `HyperLogLog` with Exact Unique Element Counting

Create a script to compare exact unique element counting with counting using `HyperLogLog`.

In [1]:
"""Module for comparing exact unique counting vs. HyperLogLog on IP logs.

This notebook provides:
- Loading IPs from a JSONL access log
- Exact unique counting with a set
- Approximate unique counting with HyperLogLog
- Precision and memory analysis

Follows Google Python Style Guide: https://google.github.io/styleguide/pyguide.html
"""

# Standard library imports
from __future__ import annotations

import json
import math
import random
import re
import sys
import time
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from dataclasses import dataclass
from hashlib import blake2b
from itertools import islice, repeat
from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, List, Sequence, Tuple

import multiprocessing as mp

LOG_FILE_PATH = Path("../data/lms-stage-access.log")
IP_PATTERN = re.compile(
    r"^(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$"
)
DEFAULT_PRECISION = 14
PRECISION_VALUES = (8, 10, 12, 14, 16)
DEFAULT_STREAM_BATCH = 10_000
DEFAULT_CHUNK_SIZE = 50_000

In [None]:
class HyperLogLog:
    def __init__(self, p=5):
        self.p = p
        self.m = 1 << p
        self.registers = [0] * self.m
        self.alpha = self._get_alpha()
        self.small_range_correction = 5 * self.m / 2  # Поріг для малих значень

    def _get_alpha(self):
        if self.p <= 16:
            return 0.673
        elif self.p == 32:
            return 0.697
        else:
            return 0.7213 / (1 + 1.079 / self.m)

    def add(self, item):
        x = mmh3.hash(str(item), signed=False)
        j = x & (self.m - 1)
        w = x >> self.p
        self.registers[j] = max(self.registers[j], self._rho(w))

    def _rho(self, w):
        return len(bin(w)) - 2 if w > 0 else 32

    def count(self):
        Z = sum(2.0**-r for r in self.registers)
        E = self.alpha * self.m * self.m / Z

        if E <= self.small_range_correction:
            V = self.registers.count(0)
            if V > 0:
                return self.m * math.log(self.m / V)

        return E

## Results

### Expected Output:

```python
Результати порівняння:
                       Точний підрахунок    HyperLogLog             
____________________________________________________________
Унікальні елементи        100000.0            99652.0      
Час виконання (сек.)        0.45                0.1        
```