In [None]:
# Install required packages
!pip install -q google-generativeai python-dotenv tqdm orjson pandas

In [None]:
import os
from pathlib import Path
import time
import random
import re
from typing import Dict, List, Any
from tqdm import tqdm
import google.generativeai as genai

# --- Configuration ---
# Hardcode your Gemini API key here (Note: avoid committing this to public repos)
GENAI_API_KEY = ""
if not GENAI_API_KEY or not GENAI_API_KEY.strip():
    raise RuntimeError("Please set GENAI_API_KEY to your Gemini API key in Cell 2.")

genai.configure(api_key=GENAI_API_KEY)
MAX_RETRIES = 5
RETRY_BASE = 1.5

# ---- System prompts (concise style) ----
INTERVIEWER_SYSTEM = """
You are a senior system design interviewer.
Ask one short, focused question per turn. Stay concrete (QPS, latency, storage). No lists.
Format strictly: Interviewer: <question>
"""

CANDIDATE_SYSTEM = """
You are a system design candidate.
Answer briefly (2–4 sentences). Be realistic and mention trade-offs.
Occasionally (~40%) begin with a brief hesitation like "Hmm…" or "Okay…" — do not do this every time.
Occasionally make a small mistake and correct it in the same reply.
Format strictly: Candidate: <answer>
"""

MODEL_NAME = "gemini-2.5-flash"

def make_model(system_prompt: str):
    return genai.GenerativeModel(MODEL_NAME, system_instruction=system_prompt)

interviewer = make_model(INTERVIEWER_SYSTEM)
candidate = make_model(CANDIDATE_SYSTEM)

# --- Helpers ---

def call_with_retry(model, contents: Any) -> str:
    delay = 0.7
    for i in range(MAX_RETRIES):
        try:
            resp = model.generate_content(contents=contents)
            text = (resp.text or "").strip()
            if text:
                return text
        except Exception:
            if i == MAX_RETRIES - 1:
                raise
        time.sleep(delay + random.random() * 0.2)
        delay *= RETRY_BASE
    return ""

HESITATION_RATE = 0.4

def control_hesitation(candidate_msg: str) -> str:
    """Ensure 'Candidate:' prefix and apply hesitation in ~40% of replies.
    If model over-hesitates, remove it stochastically to meet the target rate.
    """
    prefix = "Candidate:"
    if not candidate_msg.startswith(prefix):
        candidate_msg = prefix + " " + candidate_msg.lstrip()
    body = candidate_msg[len(prefix):].lstrip()
    lower = body.lower()
    starts_hes = lower.startswith(("hmm", "okay", "ok", "uh", "umm", "well"))

    if starts_hes and random.random() > HESITATION_RATE:
        # remove leading hesitation tokens
        body = re.sub(r'^(h+m+|hm+|hmm+|ok(?:ay)?|uh+|um+|umm+|well)[, .\-–—]*', '', body, flags=re.I).lstrip()
    elif not starts_hes and random.random() < HESITATION_RATE:
        inject = random.choice(["Hmm…", "Okay…", "Uh…"])
        body = f"{inject} {body}"

    return f"{prefix} {body}"

# Seed kickoff
KICKOFF = (
    "Interviewer: Let’s begin. How would you design a URL shortener like bit.ly?"
)

# Number of exchanges (Interviewer→Candidate is one exchange). Keep concise.
NUM_EXCHANGES = 6

# --- Run one conversation ---
history: List[str] = [KICKOFF]
turn = "candidate"

for _ in range(NUM_EXCHANGES * 2 - 1):  # already have first interviewer line
    last = history[-1]
    if turn == "candidate":
        msg = call_with_retry(candidate, last)
        msg = control_hesitation(msg)
        history.append(msg)
        turn = "interviewer"
    else:
        msg = call_with_retry(interviewer, last)
        if not msg.startswith("Interviewer:"):
            msg = "Interviewer: " + msg.lstrip()
        history.append(msg)
        turn = "candidate"

# Print transcript in the requested format
print("\n".join(history))

Interviewer: Let’s begin. How would you design a URL shortener like bit.ly?
Candidate: Hmm… Okay, I'd design a URL shortener with two core services: a shortening service that generates a unique short code for a given long URL, storing the mapping in a high-throughput key-value store like Cassandra or DynamoDB. The short code could be a Base62 encoded sequential ID, or actually, a random string is often preferred for less predictable short URLs. If there's a collision, we'd retry generation. The redirection service would then efficiently look up the original URL from the short code, needing to be extremely fast and highly available, possibly leveraging a CDN and global read replicas.
Interviewer: How many new short URLs per second does your shortening service need to support?
Candidate: assuming a large-scale service with tens of millions of daily active users, where each user creates a small number of URLs, we might expect around 200-500 new short URLs per second during peak times. Thi