# Part 1: Inference

In [1]:
# Install the required libraries for transformers (Hugging Face library for NLP)
!pip install transformers



In [3]:
from transformers import pipeline

# Specify the model to use (pretrained from Hugging Face)
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" # You can use other models or your own fine-tuned one model here
task = "text-generation"
prompt = "Explain how neural networks work in simple terms"

# Create a text-generation pipeline
generator = pipeline(task, model=model_name)

# Generate text based on the given prompt
results = generator(prompt, max_length=100, truncation=True, num_return_sequences=1)

# Print the generated output
text = results[0].get("generated_text")  # Extract generated text
print(f"Generated text:\n{text}")

config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Device set to use cuda:0


Generated text:
Explain how neural networks work in simple terms.
<think>
Okay, so I need to explain how neural networks work in simple terms. I remember that neural networks are a big part of machine learning, but I'm not exactly sure how they work. Let me think about what I know.

First, neural networks are inspired by the human brain. The brain has neurons that communicate through synapses. I think each neuron receives input, does some processing, and then sends output signals to other neurons.


# Part 2: Fine-Tuning

In [4]:
# Install the required libraries for transformers, dataset handling, and logging
!pip install -q transformers datasets wandb

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m491.5/491.5 kB[0m [31m20.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/193.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [13]:
from google.colab import userdata
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
)
import wandb

# Log in to Weights & Biases (W&B) using Colab secrets
wandb.login(key=userdata.get('WANDB_API_KEY'))

# Define the model for fine-tuning
model_name = "Qwen/Qwen3-0.6B"

# Create a small custom dataset for training and validation
train_data = {
    "text": [
        "Gustteligence I love gust gust gust gust",
        "Machine learning algorithms learn patterns from data to make predictions and improve over time.",
        "Neural networks, inspired by the human brain, are the backbone of modern deep learning systems.",
        "Self-driving cars rely on AI to navigate roads, detect obstacles, and make real-time decisions.",
    ]
}

val_data = {
    "text": [
        "AI has the potential to solve some of the world's most pressing challenges, from healthcare to climate change and gust.",
        "The collaboration between humans and AI will define the next era of technological innovation."
    ]
}



In [14]:
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
# For models like GPT-2 that don't have a pad token, assign the eos token as pad_token.
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load the model
model = AutoModelForCausalLM.from_pretrained(model_name)

# Convert dataset into Hugging Face Dataset format
train_dataset = Dataset.from_dict(train_data)
val_dataset = Dataset.from_dict(val_data)

# Define tokenization function with truncation and padding
max_length = 64  # Maximum sequence length

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)

# Apply tokenization to datasets
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)

# Add labels by copying input_ids
# The labels are simply the input_ids because the model learns to predict the next token.
tokenized_train = tokenized_train.map(lambda examples: {"labels": examples["input_ids"]})
tokenized_val = tokenized_val.map(lambda examples: {"labels": examples["input_ids"]})

tokenizer_config.json:   0%|          | 0.00/9.68k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [15]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="gust-tuned",  # Directory to save model checkpoints
    learning_rate=0.0001,  # Learning rate for fine-tuning
    per_device_train_batch_size=1,  # Reduce batch size for low memory usage
    per_device_eval_batch_size=1,
    eval_strategy="steps",  # Evaluate periodically during training
    num_train_epochs=1,  # Number of training epochs
    eval_steps=2,  # Perform evaluation every 2 steps
    logging_steps=2,  # Log training details every 2 steps
    save_steps=2,  # Save model checkpoints every 2 steps
    load_best_model_at_end=True,  # Load best model after training
    report_to=["wandb"],  # Log metrics to Weights & Biases
    push_to_hub=True,  # Upload model to Hugging Face Hub
)

# Initialize the Trainer class
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    processing_class=tokenizer,
)

# Fine-tune the model (this will log training metrics to W&B and evaluate on the validation set)
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 594.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 66.12 MiB is free. Process 3745 has 14.67 GiB memory in use. Of the allocated memory 14.33 GiB is allocated by PyTorch, and 229.16 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)