# 1. Install Dependencies

In [1]:
!pip install --upgrade --force-reinstall --no-cache-dir --no-deps \
    git+https://github.com/unslothai/unsloth-zoo.git \
    git+https://github.com/unslothai/unsloth.git --quiet
!pip install --no-deps xformers trl peft accelerate bitsandbytes --quiet
!pip install gradio --quiet

Collecting git+https://github.com/unslothai/unsloth-zoo.git
  Cloning https://github.com/unslothai/unsloth-zoo.git to /tmp/pip-req-build-0ww7gunl
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth-zoo.git /tmp/pip-req-build-0ww7gunl
  Resolved https://github.com/unslothai/unsloth-zoo.git to commit b77f10172913a7fd878504185772d3cc0cb64697
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-fl71m1kx
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-fl71m1kx
  Resolved https://github.com/unslothai/unsloth.git to commit 17dfc9f93ed7130678f4e361627b7ba122447ae2
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to buil

# 2. Imports

In [2]:
import gradio as gr
import torch
from unsloth import FastLanguageModel

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
Unsloth: Could not find `steps_per_generation` in grpo_trainer
Unsloth: Could not find `generation_batch_size` in grpo_trainer


# 3. Load the Model

In [3]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "mohamed-hassaneen/arabic-poetry-gemma-3-4b",
    max_seq_length = 256,
    load_in_4bit = True,
)
FastLanguageModel.for_inference(model)

==((====))==  Unsloth 2026.2.1: Fast Gemma3 patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.563 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


model.safetensors:   0%|          | 0.00/4.38G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/525M [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma3ForConditionalGeneration(
      (model): Gemma3Model(
        (vision_tower): SiglipVisionModel(
          (vision_model): SiglipVisionTransformer(
            (embeddings): SiglipVisionEmbeddings(
              (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
              (position_embedding): Embedding(4096, 1152)
            )
            (encoder): SiglipEncoder(
              (layers): ModuleList(
                (0-26): 27 x SiglipEncoderLayer(
                  (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
                  (self_attn): SiglipAttention(
                    (k_proj): lora.Linear(
                      (base_layer): Linear(in_features=1152, out_features=1152, bias=True)
                      (lora_dropout): ModuleDict(
                        (default): Identity()
                      )
                      (lora_A): ModuleDict(
  

# 4. Generation Logic

In [10]:
from transformers import TextIteratorStreamer
from threading import Thread

def generate_poetry(prompt):
    if not prompt or not prompt.strip():
        yield "Ÿäÿ±ÿ¨Ÿâ ÿ•ÿØÿÆÿßŸÑ ÿµÿØÿ± ÿßŸÑÿ®Ÿäÿ™..."
        return

    inputs = tokenizer(text=[prompt], return_tensors="pt").to("cuda")

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    generation_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=100,
        temperature=0.2,
        repetition_penalty=1.3,
        use_cache=True,
        eos_token_id=tokenizer.eos_token_id
    )

    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    generated_text = ""
    for new_text in streamer:
        generated_text += new_text
        yield generated_text

# 5. Gradio UI and Launch

In [14]:
theme = gr.themes.Soft(primary_hue="emerald")

with gr.Blocks(theme=theme) as demo:
    gr.Markdown("# Arabic Poetry Generator")
    gr.Markdown("Fine-tuned Gemma-3-4B for structural poetry generation.")

    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                lines=3,
                placeholder="ÿ£ÿØÿÆŸÑ ÿµÿØÿ± ÿßŸÑÿ®Ÿäÿ™ ŸáŸÜÿß...",
                label="Input Verse",
                rtl=True
            )
            submit_btn = gr.Button("Generate Rest of Poem", variant="primary")

        with gr.Column():
            output_text = gr.Textbox(
                lines=6,
                label="Model Output",
                interactive=False,
                rtl=True
            )

    gr.Examples(
        examples=["ÿßŸÑÿπŸÑŸÖ ÿ≤ŸäŸÜ Ÿàÿ™ÿ¥ÿ±ŸäŸÅ ŸÑÿµÿßÿ≠ÿ®Ÿá", "Ÿàÿ•ÿ∞ÿß ÿ£ÿ™ÿ™ŸÉ ŸÖÿ∞ŸÖÿ™Ÿä ŸÖŸÜ ŸÜÿßŸÇÿµ"],
        inputs=input_text
    )

    submit_btn.click(fn=generate_poetry, inputs=input_text, outputs=output_text)

gr.close_all()
demo.queue().launch(share=True, debug=True)

  with gr.Blocks(theme=theme) as demo:


Closing server running on port: 7860
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://be7dfa2a3e0d213104.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://be7dfa2a3e0d213104.gradio.live


