In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
# Install required libraries
!pip install -q transformers peft bitsandbytes accelerate datasets trl

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset
from trl import SFTTrainer

# Load the Phi-2 model and tokenizer
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# Configure quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Prepare the model for k-bit training
model = prepare_model_for_kbit_training(model)

# Configure LoRA
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Get the PEFT model
model = get_peft_model(model, peft_config)

# Print trainable parameters
model.print_trainable_parameters()

# Load the OASST1 dataset and select 10k samples
dataset = load_dataset("OpenAssistant/oasst1", split="train")
dataset = dataset.select(range(10000))  # Use only first 10,000 examples

# Function to format the data
def format_data(example):
    instruction = example['text']
    response = example.get('response', '')  # Adjust if needed based on actual dataset structure
    prompt = f"Instruction: {instruction}\nResponse:"
    example['text'] = f"{prompt} {response}"
    return example

# Apply the formatting function to the dataset
formatted_dataset = dataset.map(format_data)

# Define training arguments
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,  # Reduced from 3 to 1
    per_device_train_batch_size=8,  # Increased from 4 to 8
    gradient_accumulation_steps=4,  # Reduced from 8 to 4
    optim="paged_adamw_32bit",
    save_steps=500,  # Increased from 100 to 500
    logging_steps=50,  # Increased from 10 to 50
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)

# Define SFT trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=formatted_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
)

# Train the model
trainer.train()

print("Training complete!")

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

trainable params: 18,350,080 || all params: 2,798,033,920 || trainable%: 0.6558


Downloading readme:   0%|          | 0.00/10.2k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/39.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/84437 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/4401 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
50,2.166
100,2.0789
150,2.1092
200,2.0685
250,2.055
300,2.0201


Training complete!


In [2]:
# Save the model
output_dir = "./phi-2-fine-tuned"
trainer.model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")

Model saved to ./phi-2-fine-tuned


In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig

# Path to your saved fine-tuned model
MODEL_PATH = "./phi-2-fine-tuned"

# Load the saved tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# Configure quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-2",
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Load the PEFT configuration
peft_config = PeftConfig.from_pretrained(MODEL_PATH)

# Load the PEFT model
model = PeftModel.from_pretrained(base_model, MODEL_PATH)

# Set the model to evaluation mode
model.eval()

def generate_response(instruction, max_length=512):
    # Format the input
    prompt = f"Instruction: {instruction}\nResponse:"
    
    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # Generate the response
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )
    
    # Decode and return the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("Response:")[1].strip()

# Example usage
if __name__ == "__main__":
    while True:
        user_input = input("Enter your instruction (or 'quit' to exit): ")
        if user_input.lower() == 'quit':
            break
        response = generate_response(user_input)
        print("Model Response:", response)
        print()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Enter your instruction (or 'quit' to exit):  What are the main causes of climate change?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Model Response: The main causes of climate change are human activities such as burning fossil fuels, deforestation, and industrial processes. These activities release greenhouse gases such as carbon dioxide, methane, and nitrous oxide into the atmosphere, which trap heat and cause the Earth's temperature to rise. Climate change also has natural causes such as changes in the Earth's orbit and solar radiation, but these are not significant compared to the human-caused effects.



Enter your instruction (or 'quit' to exit):  How can I improve my time management skills?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Model Response: 



Enter your instruction (or 'quit' to exit):  Write a Python function to check if a number is prime.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Model Response: def is_prime(n):
    if n <= 1:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

This function takes a number as input and returns True if it is prime and False otherwise. It first checks if the number is less than or equal to 1, in which case it is not prime. Then, it iterates over all numbers from 2 to the square root of the input number and checks if any of them divide the input number evenly. If any of them do, the input number is not prime. Otherwise, it is prime.



KeyboardInterrupt: Interrupted by user

In [5]:
# First, make sure you have the latest huggingface_hub library installed
!pip install --upgrade huggingface_hub

# Import necessary libraries
from huggingface_hub import HfApi
from getpass import getpass
import os

# Set your Hugging Face credentials
# It's better to input your token this way instead of hardcoding it
hf_token = getpass("Enter your Hugging Face token: ")

# Set the path to your saved model
local_model_path = "./phi-2-fine-tuned"  # Replace with your actual path if different

# Your specific repository ID
repo_id = "sagar007/phi2_finetune"

# Initialize the Hugging Face API
api = HfApi()

# Create the repository if it doesn't exist
api.create_repo(repo_id=repo_id, token=hf_token, exist_ok=True)

# Upload the model files
for root, _, files in os.walk(local_model_path):
    for file in files:
        file_path = os.path.join(root, file)
        api.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=file_path.replace(local_model_path, "").lstrip("/"),
            repo_id=repo_id,
            token=hf_token
        )

print(f"Model successfully pushed to {repo_id}")

# You can now use this model in your Gradio app or elsewhere by referencing:
# MODEL_PATH = "sagar007/phi2_finetune"

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




Enter your Hugging Face token:  ········


adapter_model.safetensors:   0%|          | 0.00/73.4M [00:00<?, ?B/s]

Model successfully pushed to sagar007/phi2_finetune
