<a href="https://colab.research.google.com/github/srivarthinivelu/fine_tuning/blob/main/FineTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U transformers peft trl accelerate bitsandbytes datasets --quiet

#-U ==> upgrade to latest versions
#transformers ==> Hugginf face library for AI models
#peft ==> Parameter effiecient fine tuning
#trl ==> training library for LLMs (just like we have sklearn package to train ML models)
#accelerate ==> makes the training faster on GPU
#bitsandbytes ==> 4bit compression for models
#datasets ==> load/create training data


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments #AutoTokenizer ==> converts the text -> numbers (tokens) , AutoModelForCausalLM==> loads the language model
#TrainingArguments ==> seetings for tranning
from peft import LoraConfig, get_peft_model #LoraConfigsettings for Lora adaptors, get_peft_model ==> get Lora adaptors to the model
from trl import SFTTrainer #trainers for fine tuning
from datasets import Dataset # create datasets

print("Done!")

Done!


In [None]:
from google.colab import userdata
import os

os.environ["HF_TOKEN"] = userdata.get('hf_token')
print("Token loaded!")

# the above steps are just like the below
# load_dotenv()
# groq_api_key = os.getenv('GROQ_API_KEY')

Token loaded!


In [None]:
#Load Model
from transformers import BitsAndBytesConfig

model_id =  "google/gemma-2b" #which model to download

# Load model in 4-bit (saves memory)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=BitsAndBytesConfig(load_in_4bit=True), #load_in_4bit = True -> compress model 8x smaller
    #Normal ==> 16 bit (2 bytes per number)
    #4-bit (0.5 bytes per number) = 4x less memory
    #1 byte = 8 bits
    device_map="auto", #cuda/GPU
    token=os.environ['HF_TOKEN'] #use our hugging face token
)

tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])
tokenizer.padding_side = 'right' #toeknziser converts text ==> number

print("✅ Model loaded!")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Model loaded!


In [None]:
# Simple sentiment training data
training_data = [
    {"text": "Review: I love this product!\nSentiment: POSITIVE"},
    {"text": "Review: Amazing quality, highly recommend!\nSentiment: POSITIVE"},
    {"text": "Review: Best purchase ever!\nSentiment: POSITIVE"},
    {"text": "Review: Terrible, waste of money.\nSentiment: NEGATIVE"},
    {"text": "Review: Very disappointed, broke quickly.\nSentiment: NEGATIVE"},
 #  {"text": "Review: Awful experience, never again.\nSentiment: NEGATIVE"},
 #  {"text": "Review: It's okay, nothing special.\nSentiment: NEUTRAL"},
    {"text": "Review: Average product, does the job.\nSentiment: NEUTRAL"},
]

dataset = Dataset.from_list(training_data)
print(f"✅ Created {len(training_data)} training examples!")

✅ Created 6 training examples!


In [None]:
# LoRA = Train only 1% of the model, not the whole thing
lora_config = LoraConfig(
    r=8,                      # Small adapter size, higher = more capacity, but slower
    lora_alpha=16,            # Scaling factor
    lora_dropout=0.05,        # Prevents overfitting(memorzing instead of learning), randomly turn off 5% of connections
    task_type="CAUSAL_LM"     # Text generation task - predicts next word (like chatgpt)
)

# Add LoRA adapters to the model
model = get_peft_model(model, lora_config)

#adds lora adaptors to the model
#original weights are frozen (wont change)
#only adpators weights will be trained
print("✅ LoRA adapters added!")
model.print_trainable_parameters()  # Shows how little we're training

✅ LoRA adapters added!
trainable params: 921,600 || all params: 2,507,094,016 || trainable%: 0.0368


In [None]:
def ask_model(text):
    prompt = f"Review: {text}\nSentiment:" #create promot in same format as training data
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    #tokenizer(prompt) -> converts the text to numbers
    #return_tensors="pt" -> return Pytorch tensor
    #to(model.device)  ==> move to the same decive as model (GPU)
    output = model.generate(**inputs, max_new_tokens=5)

    return tokenizer.decode(output[0], skip_special_tokens=True)
    #decode = > converts numbers back to text

print("BEFORE training:")
print(ask_model("I love this product!"))

BEFORE training:
Review: I love this product!
Sentiment: I love this product!


In [None]:
from transformers import Trainer, DataCollatorForLanguageModeling

# Tokenize the data
def tokenize(example):
    return tokenizer(example["text"], truncation=True, max_length=128, padding="max_length")
    #convert each text example to tokens (number)
    #truncation=True ==> cut if longer than max_length
    #max_length == 128 ==>maximum 128 tokens
    #padding="max_length" ==>pad shortex text to 128

tokenized_dataset = dataset.map(tokenize) #apply tokenize function to all example

# Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
#collator = groups examples into batches
#mlm=False ==> not masked language model (we are doin casual LM)

# Trainer
trainer = Trainer(
    model=model, #OUR model with LoRA
    train_dataset=tokenized_dataset, #our training data
    args=TrainingArguments(
        output_dir="./results",
        per_device_train_batch_size=2, #process 2 examples at a time
        max_steps=50, #train for 50 steps total
        learning_rate=2e-4, #how big each learning step is
        logging_steps=10,
        fp16=True,
    ),
    data_collator=data_collator,
)

print("✅ Starting training...")
trainer.train()


Map:   0%|          | 0/6 [00:00<?, ? examples/s]

✅ Starting training...


Step,Training Loss
10,4.5061
20,3.2158
30,2.3739
40,2.0032
50,1.8368


TrainOutput(global_step=50, training_loss=2.7871756744384766, metrics={'train_runtime': 28.0706, 'train_samples_per_second': 3.562, 'train_steps_per_second': 1.781, 'total_flos': 152279502028800.0, 'train_loss': 2.7871756744384766, 'epoch': 16.666666666666668})

In [None]:
# Test AFTER training
print("AFTER training:")
print(ask_model("I love this product!"))
print(ask_model("Terrible, waste of money."))
print(ask_model("It's okay, average."))

AFTER training:
Review: I love this product!
Sentiment: POSITIVE
 besonderheiten:
Review: Terrible, waste of money.
Sentiment: NEGATIVE
Effectiveness: POOR
Review: It's okay, average.
Sentiment: NEUTRAL
Bedding


In [None]:
# Save the fine-tuned model
model.save_pretrained("my_sentiment_model") #save the LoRA adaptor weights (only  approx 50MB, not full model)
tokenizer.save_pretrained("my_sentiment_model") #save tokenizser too (needed to use the model later)
print("✅ Model saved!")

# Download as zip
!zip -r sentiment_model.zip my_sentiment_model
from google.colab import files
files.download("sentiment_model.zip")

✅ Model saved!
updating: my_sentiment_model/ (stored 0%)
updating: my_sentiment_model/adapter_model.safetensors (deflated 8%)
updating: my_sentiment_model/special_tokens_map.json (deflated 76%)
updating: my_sentiment_model/adapter_config.json (deflated 57%)
updating: my_sentiment_model/tokenizer_config.json (deflated 96%)
updating: my_sentiment_model/README.md (deflated 65%)
updating: my_sentiment_model/tokenizer.model (deflated 51%)
updating: my_sentiment_model/tokenizer.json (deflated 84%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!pip install huggingface-hub



In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
from huggingface_hub import HfApi

api = HfApi()

#create a repo and upload
api.create_repo(
    reoi_id="srivarthini/finetuning-sentiment-analyzer", exist_ok = True)

api.upload_folder(
    folder_path = "my_sentiment_model",
    repo_id = "srivarthini/finetuning-sentiment-analyzer"
)

HfHubHTTPError: (Request ID: Root=1-6950d441-0be03c19616e6c3e3026748e;8136f22c-00b8-45f6-90a4-13e1ea69d794)

403 Forbidden: You don't have the rights to create a model under the namespace "srivarthini".
Cannot access content at: https://huggingface.co/api/repos/create.
Make sure your token has the correct permissions.