In [1]:
코드가 하는 일 요약

Setup

uni, bi, tri 빈도 테이블을 만들어서 trigram → bigram → unigram backoff 모델 구성.

_sample_from_counts 는 temperature sampling, _argmax_from_counts 는 argmax (deterministic pick).

Baseline

generate_baseline: backoff 모델에서 한 토큰씩 temperature sampling (T=0.7).

Speculative Step

Drafter: 작은 모델 (bigram→unigram, T_draft=0.9).

Verifier: 큰 모델 (tri→bi→uni, argmax).

Prefix-accept 규칙: drafter 제안과 verifier 예측이 일치하면 계속, 처음 불일치 시 교체 후 STOP.

실험

Prompt: "the wolf ran".

Baseline 결과와 speculative 결과 비교 출력.

100회 시뮬레이션에서 평균 몇 토큰이 prefix로 accept되는지 통계까지 확인.

SyntaxError: invalid character '→' (U+2192) (2070700283.py, line 5)

In [7]:
#준비: 토큰과 n-gram 표 만들기

In [1]:
import random
from collections import Counter, defaultdict

random.seed(42)  # 결과 재현용(초보 친화)

In [12]:
# 0) Corpus -> tokens
corpus = "the wolf ran into the forest"
tokens = corpus.lower().split()

In [13]:
# 1) Unigram / Bigram / Trigram tables
uni = Counter(tokens)
bi  = defaultdict(Counter)
tri = defaultdict(Counter)

In [14]:
for a, b in zip(tokens, tokens[1:]):
    bi[a][b] += 1

for a, b, c in zip(tokens, tokens[1:], tokens[2:]):
    tri[(a, b)][c] += 1

In [15]:
# 1) Unigram: count how many times each word appears
print("=== Unigrams ===")
for word, count in uni.items():
    print(f"{word!r}: {count}")

=== Unigrams ===
'the': 2
'wolf': 1
'ran': 1
'into': 1
'forest': 1


In [16]:
# 2) Bigram: count how many times a word is followed by another word
print("\n=== Bigrams ===")
for prev, counter in bi.items():
    for nxt, count in counter.items():
        print(f"({prev!r} -> {nxt!r}): {count}")


=== Bigrams ===
('the' -> 'wolf'): 1
('the' -> 'forest'): 1
('wolf' -> 'ran'): 1
('ran' -> 'into'): 1
('into' -> 'the'): 1


In [17]:
# 3) Trigram: count how many times two words are followed by a third word
print("\n=== Trigrams ===")
for (w1, w2), counter in tri.items():
    for nxt, count in counter.items():
        print(f"(({w1!r}, {w2!r}) -> {nxt!r}): {count}")


=== Trigrams ===
(('the', 'wolf') -> 'ran'): 1
(('wolf', 'ran') -> 'into'): 1
(('ran', 'into') -> 'the'): 1
(('into', 'the') -> 'forest'): 1


In [18]:
"""
Backoff : 
falling back to a simpler model if higher-order statistics are missing.
Try trigram first (P(next | prev2, prev1)).
If no data → back off to bigram (P(next | prev1)).
If still no data → back off to unigram (P(next)).
*In short: “If the detailed context isn’t available, step back to a simpler context.”
"""

'\nBackoff : \nfalling back to a simpler model if higher-order statistics are missing.\nTry trigram first (P(next | prev2, prev1)).\nIf no data → back off to bigram (P(next | prev1)).\nIf still no data → back off to unigram (P(next)).\n*In short: “If the detailed context isn’t available, step back to a simpler context.”\n'

In [8]:
#2) backoff 분포 꺼내기 + 샘플/최빈 선택

In [21]:
def get_counts(prev2, prev1):   #backoff flow (tri → bi → uni).
    """
    Return the distribution of possible next words
    using a backoff strategy:
      1) Try trigram (prev2, prev1 → next).
      2) If not available, fall back to bigram (prev1 → next).
      3) If not available, fall back to unigram (overall counts).
    """
    d3 = tri.get((prev2, prev1))
    if d3: 
        return d3
    d2 = bi.get(prev1)
    if d2: 
        return d2
    return uni

def sample_from_counts(dist, T=1.0):   #probabilistic sampling with temperature.
    """
    Sample one token from a frequency distribution (dist).
    - Apply temperature scaling: weight = count ** (1/T).
    - Higher T (>1.0): more random / diverse.
    - Lower T (<1.0): more deterministic / greedy.
    """
    items = list(dist.items())
    toks  = [t for t, _ in items]
    cnts  = [c for _, c in items]
    weights = [c ** (1.0 / T) for c in cnts]  # temperature-adjusted weights
    return random.choices(toks, weights=weights, k=1)[0]


def argmax_from_counts(dist):  #deterministic “most frequent word” choice.
    """
    Deterministic choice:
    Pick the token with the highest frequency (mode of the distribution).
    """
    return max(dist.items(), key=lambda kv: kv[1])[0]

In [22]:
# === Try it out ===
prev2, prev1 = "wolf", "ran"   # context: "... wolf ran"
dist = get_counts(prev2, prev1)

print("Context:", (prev2, prev1))
print("Distribution (counts):", dict(dist))

print("\nSampled next (T=1.0):", sample_from_counts(dist, T=1.0))
print("Sampled next (T=0.5, greedier):", sample_from_counts(dist, T=0.5))
print("Argmax next:", argmax_from_counts(dist))

Context: ('wolf', 'ran')
Distribution (counts): {'into': 1}

Sampled next (T=1.0): into
Sampled next (T=0.5, greedier): into
Argmax next: into


In [23]:
# --- Test contexts ---
contexts = [
    ("wolf", "ran"),   # exact trigram match
    ("ran", "into"),   # trigram -> "the"
    ("the", "wolf"),   # trigram -> "ran"
    ("hello", "wolf"), # trigram missing -> backoff to bigram
    ("hello", "zzz")   # trigram + bigram missing -> backoff to unigram
]

for prev2, prev1 in contexts:
    dist = get_counts(prev2, prev1)
    print(f"\nContext: ({prev2!r}, {prev1!r})")
    print("Distribution:", dict(dist))
    print("Sampled (T=1.0):", sample_from_counts(dist))
    print("Argmax:", argmax_from_counts(dist))


Context: ('wolf', 'ran')
Distribution: {'into': 1}
Sampled (T=1.0): into
Argmax: into

Context: ('ran', 'into')
Distribution: {'the': 1}
Sampled (T=1.0): the
Argmax: the

Context: ('the', 'wolf')
Distribution: {'ran': 1}
Sampled (T=1.0): ran
Argmax: ran

Context: ('hello', 'wolf')
Distribution: {'ran': 1}
Sampled (T=1.0): ran
Argmax: ran

Context: ('hello', 'zzz')
Distribution: {'the': 2, 'wolf': 1, 'ran': 1, 'into': 1, 'forest': 1}
Sampled (T=1.0): forest
Argmax: the


In [25]:
### #3) Baseline: one-token-at-a-time sampling
"""
**Function:** `generate_baseline(prompt_tokens, steps=5, T=0.7)`  
generate_baseline: use a backoff n-gram model (tri→bi→uni) to
probabilistically append one token at a time after the given prompt,
continuing the sentence. T controls randomness (higher = more diverse).

**Flow:**  
1) Ensure at least 2 tokens of context  
2) Get next-token distribution via backoff  
3) Sample with temperature `T` (higher = more random)  
4) Append token and slide context window

"""

'\n**Function:** `generate_baseline(prompt_tokens, steps=5, T=0.7)`  \ngenerate_baseline: use a backoff n-gram model (tri→bi→uni) to\nprobabilistically append one token at a time after the given prompt,\ncontinuing the sentence. T controls randomness (higher = more diverse).\n\n**Flow:**  \n1) Ensure at least 2 tokens of context  \n2) Get next-token distribution via backoff  \n3) Sample with temperature `T` (higher = more random)  \n4) Append token and slide context window\n\n'

In [26]:
def generate_baseline(prompt_tokens, steps=5, T=0.7):
    """
    Baseline generator:
    Uses a backoff n-gram model (trigram → bigram → unigram) 
    to generate text one token at a time.

    Args:
        prompt_tokens (list): starting context tokens
        steps (int): how many tokens to generate
        T (float): temperature (controls randomness)

    Returns:
        list: the prompt plus newly generated tokens
    """
    out = list(prompt_tokens)

    # Ensure at least 2 tokens for context
    # (if the prompt is too short, duplicate the last token)
    """
    “This part is a safeguard: even if the prompt contains only a single word, 
    it forces a two-token context 
    by duplicating the last token so that the backoff model can function properly
    """
    if len(out) < 2:
        out = [out[-1], out[-1]]

    # Initialize context window (previous 2 tokens)
    prev2, prev1 = out[-2], out[-1]

    # Generate tokens step by step
    for _ in range(steps):
        dist = get_counts(prev2, prev1)     # backoff distribution
        nxt  = sample_from_counts(dist, T)  # probabilistic sample
        out.append(nxt)

        # Shift context window
        prev2, prev1 = prev1, nxt

    return out


In [28]:
#4) Speculative (core): draft → verify (prefix-accept)

In [29]:
# 1) Ensure context (>=2 tokens)
def ensure_two_token_context(prompt_tokens):
    """Safeguard: if prompt has 1 token, duplicate it to make 2."""
    ctx = list(prompt_tokens)
    if len(ctx) < 2:
        ctx = [ctx[-1], ctx[-1]]
    return ctx

In [30]:
# 2) Small-model distribution (bigram -> unigram)
def small_model_dist(prev1):
    """Return drafter's dist: bigram if available, else unigram."""
    return bi.get(prev1, uni)

In [32]:
# 3) Draft ONE token (small model)
def draft_one(prev1, T_draft):
    """Sample one next token from small-model dist."""
    return sample_from_counts(small_model_dist(prev1), T_draft)

In [33]:
# 4) Make draft of k tokens (small model advances its own context)
def make_draft(context, k=5, T_draft=0.9):
    """Return (draft, draft_trace[(prev2, prev1, t), ...])."""
    prev2, prev1 = context[-2], context[-1]
    draft, trace = [], []
    for _ in range(k):
        t = draft_one(prev1, T_draft)
        draft.append(t)
        trace.append((prev2, prev1, t))
        prev2, prev1 = prev1, t
    return draft, trace

In [34]:
# 5) Predict next by large model (argmax over backoff dist)
def predict_next_argmax(prev2, prev1):
    """Large model: tri->bi->uni, pick argmax next token."""
    dist = get_counts(prev2, prev1)
    return max(dist.items(), key=lambda kv: kv[1])[0]

In [35]:
# 6) Compare one step (prefix-accept rule on a single token)
def compare_step(prev2, prev1, draft_t):
    """
    Return (accepted_token, ok_flag, verify_token).
    If match -> accept draft; else -> replace with verify token.
    """
    v = predict_next_argmax(prev2, prev1)
    ok = (draft_t == v)
    return (draft_t if ok else v), ok, v

In [36]:
# 7) Verify whole draft with prefix-accept
def verify_prefix_accept(context, draft):
    """
    Compare left->right; on first mismatch, replace and STOP.
    Return (accepted_tokens, verify_log[(p2,p1,t,v,ok), ...]).
    """
    accepted, log = [], []
    prev2, prev1 = context[-2], context[-1]
    for t in draft:
        chosen, ok, v = compare_step(prev2, prev1, t)
        log.append((prev2, prev1, t, v, ok))
        accepted.append(chosen)
        if ok:
            prev2, prev1 = prev1, chosen
        else:
            break
    return accepted, log


In [37]:
# 8) Orchestrator (with optional trace printing)
def speculative_step(prompt_tokens, k=5, T_draft=0.9, trace=True):
    """
    #4) Speculative (core): draft -> verify (prefix-accept)
      - Drafter: bigram (else unigram) + higher T
      - Verifier: tri->bi->uni + argmax
      - Rule: first mismatch => replace & STOP
    Returns: (draft, accepted, final_sequence)
    """
    context = ensure_two_token_context(prompt_tokens)
    draft, draft_trace = make_draft(context, k=k, T_draft=T_draft)
    accepted, verify_log = verify_prefix_accept(context, draft)

    if trace:
        print("=== Draft stage (small model) ===")
        for i, (p2, p1, t) in enumerate(draft_trace, 1):
            print(f"[D{i}] prev2='{p2}' prev1='{p1}' -> draft='{t}'")
        print("\n=== Verify stage (large model, prefix-accept) ===")
        for i, (p2, p1, t, v, ok) in enumerate(verify_log, 1):
            status = "ACCEPT" if ok else "REPLACE+STOP"
            print(f"[V{i}] prev2='{p2}' prev1='{p1}'  draft='{t}'  verify='{v}'  ->  {status}")
        print("\nDraft   :", draft)
        print("Accepted:", accepted)
        print("Final   :", " ".join(prompt_tokens + accepted))

    return draft, accepted, prompt_tokens + accepted


In [13]:
def speculative_step(prompt_tokens, k=5, T_draft=0.9, trace=True):
    """
    Drafter(작은 모델): bigram(없으면 unigram) + 높은 T -> 다양성
    Verifier(큰 모델): trigram->bigram->unigram + argmax -> 결정적
    규칙: 왼쪽→오른쪽으로 비교하다가 '첫 불일치'에서 교체하고 종료
    """
    # drafter = 작은 모델: bigram 우선, 없으면 unigram
    def draft_next(prev1):
        dist_small = bi.get(prev1, uni)
        return sample_from_counts(dist_small, T_draft)

    # 1) draft 만들기
    context = list(prompt_tokens)
    if len(context) < 2:
        context = [context[-1], context[-1]]
    prev2, prev1 = context[-2], context[-1]

    draft = []
    for _ in range(k):
        t = draft_next(prev1)
        draft.append(t)
        prev2, prev1 = prev1, t  # drafter는 자신의 제안으로 문맥을 진전

    # 2) verify: 진짜 프롬프트에서 다시 시작
    accepted = []
    prev2, prev1 = context[-2], context[-1]
    log = []

    for t in draft:
        # 큰 모델의 ‘결정적’ 예측
        v = argmax_from_counts(get_counts(prev2, prev1))
        ok = (t == v)
        log.append((prev2, prev1, t, v, ok))
        if ok:
            accepted.append(t)
            prev2, prev1 = prev1, t  # 문맥 확장
        else:
            accepted.append(v)       # 교체 후 즉시 종료
            break

    if trace:
        for i,(p2,p1,t,v,ok) in enumerate(log,1):
            status = "ACCEPT" if ok else "REPLACE+STOP"
            print(f"[{i}] prev2='{p2}' prev1='{p1}'  draft='{t}'  verify='{v}'  ->  {status}")
        print("Draft   :", draft)
        print("Accepted:", accepted)
        print("Final   :", " ".join(prompt_tokens + accepted))

    return draft, accepted, prompt_tokens + accepted


In [14]:
#5) 한 번 실행해서 느낌 잡기

In [15]:
prompt = ["the", "wolf", "ran"]

print("---- Baseline (T=0.7), next 5 tokens ----")
print("Baseline:", " ".join(generate_baseline(prompt, steps=5, T=0.7)))

print("\n---- Speculative (k=5, T_draft=0.9) ----")
_ = speculative_step(prompt, k=5, T_draft=0.9, trace=True)

---- Baseline (T=0.7), next 5 tokens ----
Baseline: the wolf ran into the forest the forest

---- Speculative (k=5, T_draft=0.9) ----
[1] prev2='wolf' prev1='ran'  draft='into'  verify='into'  ->  ACCEPT
[2] prev2='ran' prev1='into'  draft='the'  verify='the'  ->  ACCEPT
[3] prev2='into' prev1='the'  draft='wolf'  verify='forest'  ->  REPLACE+STOP
Draft   : ['into', 'the', 'wolf', 'ran', 'into']
Accepted: ['into', 'the', 'forest']
Final   : the wolf ran into the forest


In [16]:
trials = 100
total = 0
for _ in range(trials):
    _, acc, _ = speculative_step(["the","wolf","ran"], k=5, T_draft=0.9, trace=False)
    total += len(acc)
print(f"Average accepted length over {trials} trials: {total/trials:.2f} / 5")


Average accepted length over 100 trials: 3.72 / 5
