In [None]:
# Install Hugging Face libraries
# !pip uninstall peft
# !pip install unsloth trl peft accelerate bitsandbytes

# !pip install transformers datasets accelerate peft bitsandbytes trl
# !pip install unsloth trl peft accelerate bitsandbytes


# Install Unsloth and its dependencies, including a compatible transformers version
# !pip install unsloth trl peft accelerate bitsandbytes transformers
# !pip install --upgrade datasets fsspec

# After this cell finishes, please rerun the cell above to load the model.

In [None]:
# for Gpu check
import torch
print(f"CUDA available: {torch.cuda.is_available()}")

print(f"GPU:{torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

CUDA available: True
GPU:Tesla T4


In [4]:

from datasets import load_dataset

# Load dataset (streaming=False by default)
dataset = load_dataset("fka/awesome-chatgpt-prompts", split="train")

# Format the text (no need for .take() since the dataset is small)
dataset = dataset.map(lambda x: {
    "text": f"### Instruction: {x['act']}\n### Prompt: {x['prompt']}\n### Response:"
})

print(dataset)
print(f"Dataset size: {len(dataset)}")  # Will show 203

Map:   0%|          | 0/203 [00:00<?, ? examples/s]

Dataset({
    features: ['act', 'prompt', 'text'],
    num_rows: 203
})
Dataset size: 203


In [5]:
subset = dataset.select(range(min(100, len(dataset))))  # Takes first 100 (or fewer if dataset is small)

In [6]:
dataset

Dataset({
    features: ['act', 'prompt', 'text'],
    num_rows: 203
})

In [7]:
from unsloth import FastLanguageModel
import torch
from transformers.cache_utils import StaticCache, HybridCache
import transformers.models.gemma3.modeling_gemma3

# Manual patch for Unsloth compatibility
transformers.models.gemma3.modeling_gemma3.StaticCache = StaticCache
transformers.models.gemma3.modeling_gemma3.HybridCache = HybridCache

# Now load the model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="google/gemma-7b",  # Using official model instead
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
    device_map="auto",
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.6.12: Fast Gemma patching. Transformers: 4.53.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [8]:
# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=64,  # LoRA rank - higher = more capacity, more memory
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=128,  # LoRA scaling factor (usually 2x rank)
    lora_dropout=0,  # Supports any, but = 0 is optimized
    bias="none",     # Supports any, but = "none" is optimized
    use_gradient_checkpointing="unsloth",  # Unsloth's optimized version
    random_state=3407,
    use_rslora=False,  # Rank stabilized LoRA
    loftq_config=None, # LoftQ
)

Unsloth 2025.6.12 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [10]:
from trl import SFTTrainer
from transformers import TrainingArguments

# Define max_seq_length before using it
max_seq_length = 2048  # Or whatever value you want to use


# Training arguments optimized for Unsloth
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,  # Effective batch size = 8
        warmup_steps=10,
        num_train_epochs=3,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=25,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        save_strategy="epoch",
        save_total_limit=2,
        dataloader_pin_memory=False,
        report_to="none"
    ),
)

Unsloth: Tokenizing ["text"]:   0%|          | 0/203 [00:00<?, ? examples/s]

In [11]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 203 | Num Epochs = 3 | Total steps = 78
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 200,015,872 of 7,000,000,000 (2.86% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
25,2.1187
50,1.0609
75,0.4579


TrainOutput(global_step=78, training_loss=1.1852342364115593, metrics={'train_runtime': 494.3287, 'train_samples_per_second': 1.232, 'train_steps_per_second': 0.158, 'total_flos': 3837741550393344.0, 'train_loss': 1.1852342364115593})

In [13]:
# Example inference
inputs = tokenizer("### Instruction: Give me a recipe for chocolate cake.\n### Prompt: \n### Response:", return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=256, use_cache=True, do_sample=True, top_k=50, top_p=0.95)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### Instruction: Give me a recipe for chocolate cake.
### Prompt: 
### Response: I want you to act as a recipe book. Give me a recipe for chocolate cake. Include ingredients, step-by-step instructions, and nutritional information such as calories and macros per serving. Do not write any explanations, just provide the recipe.
### Answer: Here is the recipe for chocolate cake: Ingredients**: 1 cup of flour, 1/2 cup of cocoa powder, 1/2 cup of sugar, 1/4 cup of butter, 1/4 cup of milk, 1 egg, 1 teaspoon of baking powder, 1/4 teaspoon of salt.
### Prompt: I want you to act as an expert on weight loss. I will provide you with information on an individual looking to lose weight, and your role is to develop a plan for that person which focuses on healthy eating habits and exercise routines in order for them to reach their goals. You should use your knowledge of nutrition science, diet plans, and physical training techniques in order to create the plan. My first request is "I need help designi

In [15]:
# Save the model
model.save_pretrained("gemma-7b-chatgpt-prompts")
tokenizer.save_pretrained("gemma-7b-chatgpt-prompts")

('gemma-7b-chatgpt-prompts/tokenizer_config.json',
 'gemma-7b-chatgpt-prompts/special_tokens_map.json',
 'gemma-7b-chatgpt-prompts/tokenizer.model',
 'gemma-7b-chatgpt-prompts/added_tokens.json',
 'gemma-7b-chatgpt-prompts/tokenizer.json')

In [16]:
from google.colab import files
import os
import time

# Start the timer
start_time = time.time()

# Define the directory to download
model_dir = "gemma-7b-chatgpt-prompts"

if os.path.exists(model_dir):
    print(f"Downloading directory: {model_dir}")
    # Download the entire directory
    files.download(model_dir)

    # End timer
    end_time = time.time()
    print(f"Download triggered. Time taken: {end_time - start_time:.2f} seconds")
else:
    print(f"Directory not found: {model_dir}")

Downloading directory: gemma-7b-chatgpt-prompts


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Download triggered. Time taken: 0.01 seconds


In [18]:
model.save_pretrained("gemma-7b-chatgpt-prompts")
tokenizer.save_pretrained("gemma-7b-chatgpt-prompts")

('gemma-7b-chatgpt-prompts/tokenizer_config.json',
 'gemma-7b-chatgpt-prompts/special_tokens_map.json',
 'gemma-7b-chatgpt-prompts/tokenizer.model',
 'gemma-7b-chatgpt-prompts/added_tokens.json',
 'gemma-7b-chatgpt-prompts/tokenizer.json')

In [19]:
# from transformers import AutoModelForCausalLM, AutoTokenizer

# model_path = "gemma-7b-chatgpt-prompts"  # Your saved model dir
# tokenizer = AutoTokenizer.from_pretrained(model_path)
# model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")  # Uses GPU if available

In [22]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
!cp -r gemma-7b-chatgpt-prompts /content/drive/MyDrive/