In [1]:
import os, re, sys, time, torch
from typing import Optional, Tuple
from transformers import AutoTokenizer, AutoModelForCausalLM

# ---------- Settings (edit if you like) ----------
# A light, open code-instruct model that runs on CPU/GPU:
BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-Coder-0.5B-Instruct")

MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512"))
TEMPERATURE    = float(os.environ.get("TEMPERATURE", "0.2"))
TOP_P          = float(os.environ.get("TOP_P", "0.95"))

AUTO_SAVE      = os.environ.get("AUTO_SAVE", "1") == "1"  # toggle saving via env var
SAVE_DIR       = os.environ.get("SAVE_DIR", ".")          # where to save code files
# ---------------------------------------------------

CODE_BLOCK_RE = re.compile(r"```([a-zA-Z0-9_+-]*)\s*\n(.*?)```", re.DOTALL)

EXT_MAP = {
    "py":"py","python":"py",
    "js":"js","javascript":"js","ts":"ts","typescript":"ts",
    "java":"java","cpp":"cpp","c":"c","c++":"cpp","cs":"cs","csharp":"cs",
    "go":"go","rs":"rs","rust":"rs","php":"php","rb":"rb","ruby":"rb",
    "sh":"sh","bash":"sh","ps1":"ps1","powershell":"ps1",
    "kt":"kt","kotlin":"kt","swift":"swift","sql":"sql","html":"html","css":"css",
}

def has_accelerate() -> bool:
    try:
        import accelerate  # noqa
        return True
    except Exception:
        return False

def try_bitsandbytes_cfg():
    """Return a BitsAndBytesConfig if available (CUDA only); else None."""
    if not torch.cuda.is_available():
        return None
    try:
        from transformers import BitsAndBytesConfig
        return BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
        )
    except Exception:
        return None

def build_prompt(tokenizer, question: str) -> str:
    """Use the model's chat template when available; fallback to an instruct format."""
    user = (
        "You are a helpful coding assistant. "
        "Write correct, secure, and concise code for the following task.\n\n"
        f"Task:\n{question}\n\n"
        "Return ONLY code inside a triple-backtick block."
    )
    try:
        return tokenizer.apply_chat_template(
            [{"role": "user", "content": user}],
            tokenize=False,
            add_generation_prompt=True
        )
    except Exception:
        return f"### Instruction:\n{user}\n\n### Response:\n"

def extract_code(text: str) -> Tuple[Optional[str], Optional[str]]:
    m = CODE_BLOCK_RE.search(text)
    if not m:
        return None, None
    lang = (m.group(1) or "").strip() or None
    code = m.group(2).strip("\n\r ")
    return lang, code

def suggest_ext(lang: Optional[str]) -> str:
    if not lang:
        return "txt"
    return EXT_MAP.get(lang.lower(), "txt")

def unique_path(base_dir: str, base_name: str, ext: str) -> str:
    """Return a non-clobbering path like base_name.ext, base_name_1.ext, ..."""
    os.makedirs(base_dir, exist_ok=True)
    path = os.path.join(base_dir, f"{base_name}.{ext}")
    if not os.path.exists(path):
        return path
    i = 1
    while True:
        alt = os.path.join(base_dir, f"{base_name}_{i}.{ext}")
        if not os.path.exists(alt):
            return alt
        i += 1

def load_model_and_tokenizer():
    qcfg = try_bitsandbytes_cfg()
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    kwargs = {
        "torch_dtype": torch.bfloat16 if torch.cuda.is_available() else torch.float32,
        "trust_remote_code": True,
    }
    if qcfg is not None:
        kwargs["quantization_config"] = qcfg
    # Only set device_map if accelerate exists (to avoid the error you saw)
    if has_accelerate():
        kwargs["device_map"] = "auto"

    model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, **kwargs)

    # If no device_map, move to one device manually
    if "device_map" not in kwargs:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)

    model.eval()
    return tokenizer, model

def generate_code(question: str) -> Tuple[str, str]:
    """
    Returns (code_text, saved_path_or_empty).
    """
    tokenizer, model = load_model_and_tokenizer()
    prompt = build_prompt(tokenizer, question)
    inputs = tokenizer([prompt], return_tensors="pt").to(next(model.parameters()).device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=True,
            temperature=TEMPERATURE,
            top_p=TOP_P,
            pad_token_id=tokenizer.eos_token_id,
        )

    full = tokenizer.decode(out[0], skip_special_tokens=True)
    if full.startswith(prompt):
        full = full[len(prompt):]

    lang, code = extract_code(full)
    if code is None:
        code, lang = full.strip(), None

    # Print to console
    print("\n===== GENERATED CODE =====\n")
    print(code)
    print("\n==========================\n")

    # Optional save
    save_path = ""
    if AUTO_SAVE:
        ext  = suggest_ext(lang)
        base = f"generated_code_{int(time.time())}"
        save_path = unique_path(SAVE_DIR, base, ext)
        with open(save_path, "w", encoding="utf-8") as f:
            f.write(code)
        print(f"[Saved] {save_path}")

    return code, save_path

def main():
    print("Code Generator (type 'exit' to quit)")
    print(f"Model: {BASE_MODEL}")
    while True:
        try:
            question = input("\nEnter your coding question/task:\n> ").strip()
            if not question:
                continue
            if question.lower() in {"exit","quit","q"}:
                print("Bye!")
                break
            generate_code(question)
        except KeyboardInterrupt:
            print("\nBye!")
            break

if __name__ == "__main__":
    main()


Code Generator (type 'exit' to quit)
Model: Qwen/Qwen2.5-Coder-0.5B-Instruct



Enter your coding question/task:
>  Implement Fibonacci using memoization and return the n-th number.



===== GENERATED CODE =====

def fibonacci(n):
    # Base cases
    if n == 0:
        return 0
    elif n == 1:
        return 1

    # Recursive case
    return fibonacci(n-1) + fibonacci(n-2)


[Saved] .\generated_code_1757874407.py



Enter your coding question/task:
>  Write a Python function is_prime(n) that returns True if n is prime.



===== GENERATED CODE =====

def is_prime(n):
    """Check if a number is prime."""
    if n <= 1:
        return False
    if n <= 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True


[Saved] .\generated_code_1757874454.py



Enter your coding question/task:
>  exit


Bye!
