In [None]:
!pip install torch torchvision transformers accelerate gradio


In [2]:
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM

# ----------- CONFIG -----------
caption_model_id = "llava-hf/llava-1.5-7b-hf"
code_model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenizer = AutoTokenizer.from_pretrained(code_model_id, trust_remote_code=True)

# ----------- IMAGE DESCRIPTION -----------
def describe_image(image):
    try:
        processor = AutoProcessor.from_pretrained(caption_model_id)
        model = LlavaForConditionalGeneration.from_pretrained(
            caption_model_id, torch_dtype=torch.float16, device_map="auto"
        ).eval()

        prompt = "<image>\nDescribe the image."
        image = image.convert("RGB")
        inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
        output = model.generate(**inputs, max_new_tokens=100)
        caption = processor.batch_decode(output, skip_special_tokens=True)[0]

        del model
        torch.cuda.empty_cache()

        return caption
    except Exception as e:
        torch.cuda.empty_cache()
        return f"Error generating description: {e}"

# ----------- CODE GENERATION -----------
def generate_code_only(task_description, temperature=0.7, top_p=0.9, max_tokens=150):
    try:
        model = AutoModelForCausalLM.from_pretrained(
            code_model_id,
            trust_remote_code=True,
            torch_dtype=torch.float16,
            device_map="auto"
        ).eval()

        prompt = (
            f"Write only the Python function that solves the following task. "
            f"Do not add comments, explanations, or markdown.\n\n"
            f"Task: {task_description}\n\n"
            f"Code:\n"
        )
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            do_sample=True
        )
        text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        del model
        torch.cuda.empty_cache()

        return text.split("Code:")[-1].strip() if "Code:" in text else text.strip()
    except Exception as e:
        torch.cuda.empty_cache()
        return f"Error generating code: {e}"

# ----------- COMBINED FUNCTION -----------
def generate_both(selected, image, task_prompt, temperature, top_p, max_tokens):
    caption, code = "", ""

    if "Image Description" in selected and image:
        caption = describe_image(image)

    if "Python Code" in selected and task_prompt.strip():
        code = generate_code_only(task_prompt, temperature, top_p, max_tokens)

    return caption, code

# ----------- GRADIO INTERFACE -----------
with gr.Blocks() as demo:
    gr.Markdown("## 🔄 Unified Image Description & Code Generator")

    with gr.Row():
        task_selector = gr.CheckboxGroup(
            choices=["Image Description", "Python Code"],
            label="Select Tasks",
            value=["Image Description"]
        )

    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload Image")
        task_prompt = gr.Textbox(label="Task Prompt (for code generation)", lines=2, placeholder="E.g., Write a function to reverse a string.")

    with gr.Row():
        temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
        top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
        max_tokens = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")

    with gr.Row():
        run_button = gr.Button("Generate")

    with gr.Row():
        caption_output = gr.Textbox(label="Image Description", lines=5)
        code_output = gr.Textbox(label="Generated Python Code", lines=15)

    run_button.click(
        fn=generate_both,
        inputs=[task_selector, image_input, task_prompt, temperature, top_p, max_tokens],
        outputs=[caption_output, code_output]
    )

if __name__ == "__main__":
    demo.launch()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.87k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d97cb1567f6846a5bd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
