In [None]:
%%capture
%pip install -U bitsandbytes
%pip install -U transformers
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U datasets

In [None]:
import os
import torch
import wandb
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    logging
)
from peft import LoraConfig, get_peft_model
from huggingface_hub import login
from trl import SFTTrainer, setup_chat_format
import bitsandbytes as bnb
import getpass

In [None]:
import getpass

# Hugging Face login
hf_token = getpass.getpass('hugging_face_token')
from huggingface_hub import login
login(token=hf_token)

# Weights & Biases login
wb_token = getpass.getpass('wandb_token')
import wandb
wandb.login(key=wb_token)

run = wandb.init(project='Fine-tune Gemma-2-2b-it on react-code-instructions', job_type="training", anonymous="allow")


In [None]:
# Model configurations
base_model = "google/gemma-2-2b-it"
new_model = "Gemma-2-2b-it-ChatReact"
dataset_name = "cfahlgren1/react-code-instructions"

# Adjust precision and attention based on GPU
if torch.cuda.get_device_capability()[0] >= 8:
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
    !pip install -qqq flash-attn  # Install flash attention if supported
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

# BitsAndBytes configuration for memory-efficient model loading
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

In [None]:
# Load model with quantization and optimized attention
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

# Efficient LoRA fine-tuning configuration
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    lora_module_names.discard('lm_head')  # Exclude lm_head for 16-bit
    return list(lora_module_names)

modules = find_all_linear_names(model)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/838 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

tokenizer.chat_template = None # Reset the chat template to prevent duplication error

model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [None]:
from datasets import load_dataset
dataset = load_dataset("cfahlgren1/react-code-instructions", split="train")


# Example: Accessing the first data point
example = dataset[0]
messages = example['messages']


# Display the roles and contents
for message in messages:
    print(f"{message['role'].capitalize()}: {message['content']}\n")


README.md:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


(…)7e68fb-44fe-47fc-b603-0279f2f8a7ca.jsonl:   0%|          | 0.00/221M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


(…)26ed2f-5aef-4f09-951e-a27efa6f185c.jsonl:   0%|          | 0.00/70.2M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


(…)bb7306-deff-4b78-85b0-03d776381591.jsonl:   0%|          | 0.00/203M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


data_uniques.jsonl:   0%|          | 0.00/478M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/74428 [00:00<?, ? examples/s]

System: 
You are an expert React, TypeScript, and TailwindCSS developer with a keen eye for modern, aesthetically pleasing design.

Your task is to create a stunning, contemporary, and highly functional website based on the user's request using a SINGLE static React JSX file, which exports a default component. This code will go directly into the App.tsx file and will be used to render the website.

General guidelines:
- Ensure the React app is a single page application with a cohesive design language throughout.
- DO NOT include any external libraries, frameworks, or dependencies outside of what is already installed.
- For icons, create simple, elegant SVG icons. DO NOT use any icon libraries.
- Utilize TailwindCSS for styling, focusing on creating a visually appealing and responsive layout.
- Avoid using arbitrary values (e.g., `h-[600px]`). Stick to Tailwind's predefined classes for consistency.
- Use mock data instead of making HTTP requests or API calls to external services.
- Impl

In [None]:
small = dataset.shuffle(42).select(range(3000))
def extract_msgs(ex):
    system, user, assistant = ex["messages"]
    return {
        "system": system["content"],
        "user":   user["content"],
        "assistant": assistant["content"]
    }
small = small.map(extract_msgs, remove_columns=["messages"], num_proc=4)
def build_prompt(ex):
    return {"text":
        "[SYSTEM]\n" + ex["system"] +
        "\n\n[USER]\n" + ex["user"] +
        "\n\n[ASSISTANT]\n" + ex["assistant"]
    }
ds = small.map(build_prompt, remove_columns=["system","user","assistant"], num_proc=4)


# 2. Split
ds_splits = ds.train_test_split(test_size=0.1)


Map (num_proc=4):   0%|          | 0/3000 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/3000 [00:00<?, ? examples/s]

In [None]:
ds_splits['train']

Dataset({
    features: ['created_at', 'model', 'recommended', 'upvoted', 'text'],
    num_rows: 2700
})

In [None]:
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import AutoTokenizer, AutoModelForCausalLM, EarlyStoppingCallback,DataCollatorForLanguageModeling
import torch


data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

In [None]:
# 1. Define SFTConfig with label_names
sft_config = SFTConfig(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,

    eval_strategy="steps",
    eval_steps=200,
    save_steps=400,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,

    logging_steps=1,
    warmup_steps=10,
    learning_rate=2e-4,
    fp16=True,
    bf16=False,
    group_by_length=True,
    report_to="wandb",

    dataset_text_field="text",
    max_seq_length=512,
    packing=False,

    # label_names
    label_names=["labels"],
)

# 2. Prepare model
torch.cuda.empty_cache()
model.config.use_cache = False


In [None]:

# 4. Instantiate trainer (no label_names arg here!)
trainer = SFTTrainer(
    model=model,
    train_dataset=ds_splits["train"],
    eval_dataset=ds_splits["test"],
    peft_config=peft_config,
    args=sft_config,
    processing_class=tokenizer,
    data_collator=data_collator,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

# 5. Train
trainer.train()


Applying chat template to train dataset:   0%|          | 0/2700 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2700 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2700 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss
200,0.3518,0.056923
400,0.1108,0.053615
600,0.0015,0.051424
800,0.1156,0.049951
1000,0.0026,0.049114
1200,0.002,0.048028




TrainOutput(global_step=1350, training_loss=0.08669622331400643, metrics={'train_runtime': 2318.0528, 'train_samples_per_second': 1.165, 'train_steps_per_second': 0.582, 'total_flos': 1.69631814818304e+16, 'train_loss': 0.08669622331400643})

In [None]:
wandb.finish()
model.config.use_cache = True

0,1
eval/loss,█▅▄▃▂▁
eval/mean_token_accuracy,▁▄▅▆▇█
eval/num_tokens,▁▂▄▅▇█
eval/runtime,▁▁▃▇▄█
eval/samples_per_second,██▆▃▅▁
eval/steps_per_second,██▆▃▅▁
train/epoch,▁▁▁▁▂▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇███
train/global_step,▁▁▁▁▁▁▂▂▂▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,█▆▂▃▁▁▁▁▁▁▃▂▂▁▁▁▂▃▁▁▁▁▁▃▁▂▃▁▂▁▂▁▂▁▃▁▁▁▁▁
train/learning_rate,███▇▇▇▇█▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁

0,1
eval/loss,0.04803
eval/mean_token_accuracy,0.98982
eval/num_tokens,1228731.0
eval/runtime,79.4312
eval/samples_per_second,3.777
eval/steps_per_second,3.777
total_flos,1.69631814818304e+16
train/epoch,1.0
train/global_step,1350.0
train/grad_norm,0.28904


In [None]:
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)



adapter_model.safetensors:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/shreyan25G/Gemma-2-2b-it-ChatReact/commit/ee00d1f28cf978dad5f44d11860c95b147e1a648', commit_message='Upload model', commit_description='', oid='ee00d1f28cf978dad5f44d11860c95b147e1a648', pr_url=None, repo_url=RepoUrl('https://huggingface.co/shreyan25G/Gemma-2-2b-it-ChatReact', endpoint='https://huggingface.co', repo_type='model', repo_id='shreyan25G/Gemma-2-2b-it-ChatReact'), pr_revision=None, pr_num=None)

In [None]:

messages = [{"role": "user", "content": "You are an expert React, TypeScript, and TailwindCSS developer with a keen eye for modern, aesthetically pleasing design.Create Calculator App"}]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

# Optimized generation with tuned sampling strategies
outputs = model.generate(
    **inputs,
    max_length=1000,  # Increase max length for complex answers
    num_return_sequences=1,
    top_k=50,
    top_p=0.85,  # Narrow top-p for more deterministic output
    temperature=0.3,  # Slightly higher temperature for balance between creativity and accuracy
    no_repeat_ngram_size=3,
)

# Decode and clean up the output
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = text.split("assistant")[1].strip()

print(response)

Certainly! Below is a modern, visually appealing calculator app built with React, Typescript, and styled with Tailwind CSS. This app includes a simple interface with a focus on usability and aesthetics.

```tsx
import React, { useState } from 'react';

const App: React.FC = () => {
  const [input, setInput] = useState<string>('');
  // Add a button to clear the input
  let clearInput = () =>{
    setInput('');
    // Add an input to the input field
    const inputField = document.getElementById('input')
    inputField.value = '';
  }
  
  return (
    <div className="bg-gray-100 dark:bg-blue-900">
      <div
        className="flex flex-col justify-center items-center p-12"
        id="calculator"
      >
        <div>
          <h1 className="text-4xl font-bold text-gray text-center">
            Calculator
          </h1>
        </div>

        <input
          type="text"
          id="input"
         className="bg-[#f2f2fa] rounded-lg shadow-md p-4 w-full"
           value={input}