In [1]:
!pip install transformers datasets peft accelerate bitsandbytes --quiet

In [None]:

# Install necessary packages (run in Colab or local virtual environment)


# 1. Imports
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, TaskType
import torch

# 2. Load your dataset (replace with your path or use `load_dataset` with a script or file)
dataset = load_dataset("json", data_files="/content/gfg_html_css_articles_concurrency.json", split="train")

# Keep only relevant fields
dataset = dataset.map(lambda x: {
    "text": f"{x['title']}\n<start_html>\n{x['html_code']}\n<end_html>\n<start_css>\n{x['css_code']}\n<end_css>"
})

# 3. Load tokenizer and model (using Gemma 2B as an example; substitute with a 4-bit-supported model if needed)
model_name = "google/gemma-2b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Pad with eos

# Load model with 4-bit quantization
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model = prepare_model_for_kbit_training(model)

# 4. PEFT QLoRA config
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, peft_config)

# 5. Tokenize data
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=1024)

tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=["text"])

# 6. Training arguments
training_args = TrainingArguments(
    output_dir="./qlora-html-css",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=50,
    max_steps=1000,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    save_total_limit=2
)

# 7. Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# 8. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 9. Train
trainer.train()

# 10. Save model
trainer.save_model("./qlora-html-css")
tokenizer.save_pretrained("./qlora-html-css")

# 11. Inference example
from transformers import pipeline
pipe = pipeline("text-generation", model="./qlora-html-css", tokenizer=tokenizer, device=0)
pipe("responsive login page with form")  # Will output HTML+CSS format


tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Map:   0%|          | 0/705 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mrajdesai1517[0m ([33mrajdesai1517-charusat-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,0.7258
20,0.7227
30,0.6937
40,0.6823
50,0.5978
60,0.5686
70,0.527
80,0.4896
90,0.5787
100,0.4818


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


[{'generated_text': 'responsive login page with form using html and css  \n<link href="https://cdn.materialdesignicons.com/'}]

In [None]:
# chat = [
#     {"role": "system", "content": "You are a smart html and css generator."},
#     {"role": "user", "content": "Generate a responsive login page with form"}
# ]
# pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
response = pipe("Design a Google Chrome Page Template using HTML and CSS. Format the code properly and if you don't know reply, I don't know",max_new_tokens=1024)
print(response)

# pipe("responsive login page with form")  # Will output HTML+CSS format

[{'generated_text': 'Design a Google Chrome Page Template using HTML and CSS. Format the code properly and if you don\'t know reply, I don\'t know how to use the code properly.\n<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css" rel="stylesheet"/>\n<div class="google-chrome">\n        <div class="google-chrome-title">\n                <i class="fa fa-chrome" aria-hidden="true">\n                </i>\n                Google Chrome\n        </div>\n        <div class="google-chrome-bar">\n                <i class="fa fa-home" aria-hidden="true">\n                </i>\n                Home\n                <i class="fa fa-user" aria-hidden="true">\n                </i>\n                Profile\n                <i class="fa fa-cog" aria-hidden="true">\n                </i>\n                Settings\n        </div>\n</div>\n<div class="google-chrome-window">\n        <div class="google-chrome-tab">\n                <i class="fa fa-book" aria-hidd

In [3]:
from transformers import pipeline, AutoTokenizer, GenerationConfig, AutoModelForCausalLM

model_path = "rajdesai1510/small_fine_tuned_gemma"  # path to your fine-tuned QLoRA model
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# Optional: attach generation config
generation_config = GenerationConfig(
    max_new_tokens=1024,
    temperature=0.5,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.2,
    no_repeat_ngram_size=4,
    early_stopping=True,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id
)

pipe = pipeline(
    "text-generation",
    model=model_path,
    tokenizer=tokenizer,
    device=0,
    generation_config=generation_config
)

def format_prompt(title):
    return f"Title: {title}\nOutput:\n"

# Example usage
title = "Design a Google Chrome Page Template using HTML and CSS. Format Properly."
result = pipe(format_prompt(title))[0]['generated_text']
generated_code = result.split("Output:\n", 1)[-1].strip()
print(generated_code)


tokenizer_config.json:   0%|          | 0.00/40.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/522 [00:00<?, ?B/s]



adapter_config.json:   0%|          | 0.00/770 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/3.70M [00:00<?, ?B/s]

Device set to use cpu


KeyboardInterrupt: 

In [2]:
from transformers import pipeline, AutoTokenizer, GenerationConfig, AutoModelForCausalLM
from transformers import BitsAndBytesConfig

model_path = "rajdesai1510/small_fine_tuned_gemma"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# Load model in 4-bit using bitsandbytes

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",  # Use "fp4" or "nf4"
    bnb_4bit_compute_dtype="float16"
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_config,
    device_map="auto",  # Automatically allocates across devices
    trust_remote_code=True
)

# Optional: set generation config
generation_config = GenerationConfig(
    max_new_tokens=1024,
    temperature=0.5,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.2,
    no_repeat_ngram_size=4,
    early_stopping=True,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id
)

# Create pipeline with quantized model
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=generation_config
)

def format_prompt(title):
    return f"Title: {title}\nOutput:\n"

# Example inference
title = "Design a Google Chrome Page Template using HTML and CSS. Format Properly."
result = pipe(format_prompt(title))[0]['generated_text']
generated_code = result.split("Output:\n", 1)[-1].strip()
print(generated_code)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

In [None]:
generated_code

'https://www.w3docs.com/tutorials/html/google-chrome-template-using-css-and-javascript.html\n \n<!DOCTYPE html>\n<html lang="en">\n <head>\n  <meta charset="utf-8"/>\n  <title>\n   Google Chrome Template\n  </title>\n  <!-- Set background color to black -->\n  <style>\n   body {\n            background: #000;\n            color: #fff;\n            padding: 25px;\n            font-family: "Arial", sans-serif;\n            margin: 10px;}\n  </style>\n </head>\n <body>\n  <h1>\n   GeeksforGeeks\n  </h1>\n  <h3>\n   A Google Chrome Template using CSS and JavaScript\n  </h3>\n  <p>\n   Set your browser background to black and white using the code given below.\n  </p>\n  <strong>\n   <a href="#" style="text-decoration: none;">\n    https://geeksforgeeks.org/set-your-browser-background-to-black-and-white-using-the-code-given-below/\n    </a>\n  </strong>\n </body>\n</html>\n \n \nHTML code: \n ~~~~~~~~~~~~~~~~~ \n \t<body>\n\t\t \t \t \n                <!-- Set background to black --> \n     

In [None]:
from huggingface_hub import HfApi

api = HfApi(token="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")
api.upload_folder(
    folder_path="/content/qlora-html-css",
    repo_id="rajdesai1510/small_fine_tuned_gemma",
    repo_type="model",
)


adapter_model.safetensors:   0%|          | 0.00/3.70M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/3.70M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/7.43M [00:00<?, ?B/s]

Upload 21 LFS files:   0%|          | 0/21 [00:00<?, ?it/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/3.70M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/7.43M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

events.out.tfevents.1746090739.ed631c5f1106.591.0:   0%|          | 0.00/26.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/rajdesai1510/small_fine_tuned_gemma/commit/84a8d1520278a3deb423ceee6204da69cf1b014c', commit_message='Upload folder using huggingface_hub', commit_description='', oid='84a8d1520278a3deb423ceee6204da69cf1b014c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/rajdesai1510/small_fine_tuned_gemma', endpoint='https://huggingface.co', repo_type='model', repo_id='rajdesai1510/small_fine_tuned_gemma'), pr_revision=None, pr_num=None)

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…