In [1]:
from google.colab import files

uploaded = files.upload()  # This will open a file upload prompt


Saving dataset.md to dataset.md
Saving deepseekv3-cost-explained.md to deepseekv3-cost-explained.md
Saving deepseekv3-explained.md to deepseekv3-explained.md
Saving design-notes-3fs.md to design-notes-3fs.md
Saving open-source-week.md to open-source-week.md


In [2]:
import os
import shutil

# Create a directory if it doesn't exist
data_dir = "/content/data"
os.makedirs(data_dir, exist_ok=True)

# Move uploaded files to the new directory
for filename in uploaded.keys():
    shutil.move(filename, os.path.join(data_dir, filename))

In [3]:
# Install dependencies
!pip install transformers peft torch accelerate datasets trl bitsandbytes ragas sentence-transformers faiss-cpu llama-cpp-python nltk

import os
import torch
import random
import numpy as np
import faiss
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
from trl import DPOTrainer
from nltk.translate.bleu_score import sentence_bleu
from sentence_transformers import SentenceTransformer

# === 1️⃣ Load and Process Dataset ===
dataset_files = [
    "/content/data/dataset.md",
    "/content/data/deepseekv3-cost-explained.md",
    "/content/data/deepseekv3-explained.md",
    "/content/data/design-notes-3fs.md",
    "/content/data/open-source-week.md"
]

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting trl
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting ragas
  Downloading ragas-0.2.14-py3-none-any.whl.metadata (8.5 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.7.tar.gz (66.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.7/66.7 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-non

In [4]:
data_entries = []

for file in dataset_files:
    with open(file, "r", encoding="utf-8") as f:
        content = f.readlines()

    for line in content:
        if ":" in line:  # Extract key-value pairs
            parts = line.strip().split(":", 1)
            if len(parts) == 2:
                question, answer = parts
                data_entries.append({"question": question.strip(), "answer": answer.strip()})

# Convert into Hugging Face dataset
dataset = Dataset.from_list(data_entries)

# Split dataset
train_size = int(0.8 * len(dataset))
valid_size = int(0.1 * len(dataset))
train_dataset = dataset.select(range(train_size))
valid_dataset = dataset.select(range(train_size, train_size + valid_size))
test_dataset = dataset.select(range(train_size + valid_size))

In [5]:
# === 2️⃣ Load Model & Tokenizer ===
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.6k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

In [6]:
from transformers import Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
import torch

# Define the LoRA configuration
lora_config = LoraConfig(
    r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
)

# Apply LoRA configuration to model
model = get_peft_model(model, lora_config)

# Tokenization function
def tokenize_function(example):
    inputs = tokenizer(example["question"], truncation=True, padding="max_length", max_length=32)
    labels = tokenizer(example["answer"], truncation=True, padding="max_length", max_length=32)
    inputs["labels"] = labels["input_ids"]  # Add labels for loss computation
    return inputs

# Apply tokenization to datasets
train_dataset = train_dataset.map(tokenize_function, batched=True)
valid_dataset = valid_dataset.map(tokenize_function, batched=True)

# Define TrainingArguments without model_init_kwargs
training_args = TrainingArguments(
    output_dir="./qwen_finetuned",
    learning_rate=5e-4,  # Increased learning rate for faster convergence
    per_device_train_batch_size=8,  # Increased batch size
    per_device_eval_batch_size=8,
    num_train_epochs=1,  # Keep single epoch to reduce training time
    eval_strategy="steps",  # Evaluate more frequently
    save_strategy="steps",
    save_steps=500,  # Save model more frequently
    logging_dir="./logs",
    save_total_limit=1,  # Keep only the latest checkpoint
    fp16=True,
    gradient_accumulation_steps=2,  # Simulating larger batch size without increasing memory
    load_best_model_at_end=True
)

# Create Trainer and begin training
trainer = Trainer(
    model=model, args=training_args, train_dataset=train_dataset, eval_dataset=valid_dataset
)

trainer.train()


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

Map:   0%|          | 0/7 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mabeywickramairumi[0m ([33mabeywickramairumi-irumi[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss


TrainOutput(global_step=4, training_loss=12.650873184204102, metrics={'train_runtime': 118.7085, 'train_samples_per_second': 0.497, 'train_steps_per_second': 0.034, 'total_flos': 31453516922880.0, 'train_loss': 12.650873184204102, 'epoch': 1.0})

In [7]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=quant_config)

model.save_pretrained("qwen_finetuned_gguf", use_safetensors=True)
tokenizer.save_pretrained("qwen_finetuned_gguf")


`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

('qwen_finetuned_gguf/tokenizer_config.json',
 'qwen_finetuned_gguf/special_tokens_map.json',
 'qwen_finetuned_gguf/vocab.json',
 'qwen_finetuned_gguf/merges.txt',
 'qwen_finetuned_gguf/added_tokens.json',
 'qwen_finetuned_gguf/tokenizer.json')

In [8]:
%%writefile inference.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load fine-tuned model
model_path = "qwen_finetuned"  # Ensure this folder is included in submission

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, device_map="auto")

def generate_answer(question):
    input_ids = tokenizer(question, return_tensors="pt").input_ids.to("cuda")
    output = model.generate(input_ids, max_length=200)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Example usage
if __name__ == "__main__":
    question = "How does DeepSeek-V3 optimize training?"
    print(generate_answer(question))


Writing inference.py


In [9]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True
)

# Load the model with the quantization config
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=quant_config)

# Save the quantized model in gguf format
model.save_pretrained("qwen_finetuned_gguf", use_safetensors=True)
tokenizer.save_pretrained("qwen_finetuned_gguf")


`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

('qwen_finetuned_gguf/tokenizer_config.json',
 'qwen_finetuned_gguf/special_tokens_map.json',
 'qwen_finetuned_gguf/vocab.json',
 'qwen_finetuned_gguf/merges.txt',
 'qwen_finetuned_gguf/added_tokens.json',
 'qwen_finetuned_gguf/tokenizer.json')

In [13]:
from transformers import AutoModelForCausalLM
from bitsandbytes import BitsAndBytesConfig

# Set up quantization configuration
quant_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True
)

# Load the model with the quantization config
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=quant_config)

# Save the quantized model in .gguf format
model.save_pretrained("qwen_finetuned_gguf", use_safetensors=True)

# Save the tokenizer for the model
tokenizer.save_pretrained("qwen_finetuned_gguf")



ImportError: cannot import name 'BitsAndBytesConfig' from 'bitsandbytes' (/usr/local/lib/python3.11/dist-packages/bitsandbytes/__init__.py)

In [11]:
!pip install huggingface_hub
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: write).
The token `task3` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate

In [12]:
from huggingface_hub import upload_folder

# Path to your model folder
model_path = "/content/qwen_finetuned"  # Adjust this if your model path differs

# Replace with your Hugging Face repo ID
repo_id = "irumiTh/TeamName_codeRed"  # This will be the name of your repository

# Upload the folder to Hugging Face
upload_folder(
    repo_id=repo_id,
    folder_path=model_path
)


optimizer.pt:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/7.39M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/irumiTh/TeamName_codeRed/commit/b5221109cf4e30dad74068fdf22fa938f48a032a', commit_message='Upload folder using huggingface_hub', commit_description='', oid='b5221109cf4e30dad74068fdf22fa938f48a032a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/irumiTh/TeamName_codeRed', endpoint='https://huggingface.co', repo_type='model', repo_id='irumiTh/TeamName_codeRed'), pr_revision=None, pr_num=None)

In [15]:
mv model.safetensors TeamName_codeRed.gguf


mv: cannot stat 'model.safetensors': No such file or directory
