In [None]:
import gradio as gr

def generate(input_text, slider_value):
    # Simple example response
    return f"Input: {input_text}\nMax new tokens: {slider_value}"

# Create the interface
demo = gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(label="Prompt"), 
        gr.Slider(label="Max new tokens", value=20, maximum=1024, minimum=1)
    ],
    outputs=gr.Textbox(label="Completion")
)

# Launch the interface
demo.launch(share=True, server_port=8080)  # Changed port to 8080

In [None]:
import os
import sys
import gradio as gr
from transformers import pipeline

# Set up environment for GPU usage (Optional if using `device_map="auto"`)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

def create_generation_pipeline(model_id="meta-llama/Llama-3.2-1B-Instruct"):
    return pipeline(
        "text-generation",
        model=model_id,
        device_map="auto",  # Automatically assigns devices
    )

# Handle command-line arguments with a default model
model_id = sys.argv[1] if len(sys.argv) > 1 else "meta-llama/Llama-3.2-1B-Instruct"
pipe = create_generation_pipeline(model_id=model_id)

def generate(input_text, slider_value):
    # Use the pipeline to generate text
    results = pipe(input_text, max_new_tokens=slider_value)
    output_text = results[0]["generated_text"]  # Extract the text from the result
    return f"Input: {input_text}\nOutput: {output_text}"

# Create the Gradio interface
demo = gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(label="Prompt"), 
        gr.Slider(label="Max new tokens", value=20, maximum=1024, minimum=1)
    ],
    outputs=gr.Textbox(label="Completion")
)

# Launch the interface
demo.launch(share=True, server_port=8080)
