In [1]:
import os, re, sys, json, torch
from typing import Optional
from transformers import AutoTokenizer, AutoModelForCausalLM

# ---------------- USER KNOBS ----------------
BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-Coder-0.5B-Instruct")
MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512"))
TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.2"))
TOP_P = float(os.environ.get("TOP_P", "0.95"))
# -------------------------------------------

def try_bitsandbytes_cfg():
    """Return a BitsAndBytesConfig if available and CUDA exists; else None."""
    if not torch.cuda.is_available():
        return None
    try:
        from transformers import BitsAndBytesConfig
        return BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
        )
    except Exception:
        return None

def build_prompt(tokenizer, question: str) -> str:
    """Use chat template if the model has one; fallback to simple instruction format."""
    user = (
        "You are a helpful coding assistant. "
        "Write correct, secure, and concise code for the following task.\n\n"
        f"Task:\n{question}\n\n"
        "Return ONLY code inside a triple-backtick block."
    )
    try:
        return tokenizer.apply_chat_template(
            [{"role": "user", "content": user}],
            tokenize=False,
            add_generation_prompt=True
        )
    except Exception:
        return f"### Instruction:\n{user}\n\n### Response:\n"

CODE_BLOCK_RE = re.compile(
    r"```([a-zA-Z0-9_+-]*)\s*\n(.*?)```",
    re.DOTALL
)

def extract_code(text: str) -> (Optional[str], Optional[str]):
    """Return (language, code) from the first fenced block, if any."""
    m = CODE_BLOCK_RE.search(text)
    if not m:
        return None, None
    lang = (m.group(1) or "").strip() or None
    code = m.group(2).strip("\n\r ")
    return lang, code

def suggest_ext(lang: Optional[str]) -> str:
    mapping = {
        "py":"py","python":"py",
        "js":"js","javascript":"js","ts":"ts","typescript":"ts",
        "java":"java","cpp":"cpp","c":"c","c++":"cpp","cs":"cs","csharp":"cs",
        "go":"go","rs":"rs","rust":"rs","php":"php","rb":"rb","ruby":"rb",
        "sh":"sh","bash":"sh","ps1":"ps1","powershell":"ps1",
        "kt":"kt","kotlin":"kt","swift":"swift","sql":"sql","html":"html","css":"css"
    }
    if not lang: return "txt"
    return mapping.get(lang.lower(), "txt")

def load_model_and_tokenizer():
    qcfg = try_bitsandbytes_cfg()
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
        device_map="auto",
        quantization_config=qcfg,
        trust_remote_code=True,
    )
    model.eval()
    return tokenizer, model

def generate_code(question: str, save_to_file: bool = True) -> str:
    tokenizer, model = load_model_and_tokenizer()
    prompt = build_prompt(tokenizer, question)
    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=True,
            temperature=TEMPERATURE,
            top_p=TOP_P,
            pad_token_id=tokenizer.eos_token_id,
        )

    full = tokenizer.decode(out[0], skip_special_tokens=True)
    # Remove prompt prefix if present
    if full.startswith(prompt):
        full = full[len(prompt):]

    lang, code = extract_code(full)
    if code is None:
        # fallback: return raw completion if the model didn’t fence code
        code = full.strip()
        lang = None

    print("\n===== GENERATED CODE =====\n")
    print(code)
    print("\n==========================\n")

    if save_to_file:
        ext = suggest_ext(lang)
        fname = f"generated_code.{ext}"
        with open(fname, "w", encoding="utf-8") as f:
            f.write(code)
        print(f"[Saved] {fname}")

    return code

def main():
    if len(sys.argv) > 1:
        question = " ".join(sys.argv[1:]).strip()
    else:
        question = input("Enter your coding question/task:\n> ").strip()
    if not question:
        print("No question provided.")
        sys.exit(1)
    generate_code(question, save_to_file=True)

if __name__ == "__main__":
    main()


tokenizer_config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\sagni\AppData\Local\Programs\Python\Python311\Lib\site-packages\IPython\core\interactiveshell.py", line 3670, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\sagni\AppData\Local\Temp\ipykernel_31204\1497219379.py", line 136, in <module>
    main()
  File "C:\Users\sagni\AppData\Local\Temp\ipykernel_31204\1497219379.py", line 133, in main
    generate_code(question, save_to_file=True)
  File "C:\Users\sagni\AppData\Local\Temp\ipykernel_31204\1497219379.py", line 87, in generate_code
    tokenizer, model = load_model_and_tokenizer()
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\sagni\AppData\Local\Temp\ipykernel_31204\1497219379.py", line 76, in load_model_and_tokenizer
    model = AutoModelForCausalLM.from_pretrained(
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\sagni\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\auto\auto_fact