In [2]:
!pip3 install torch transformers huggingface_hub bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.44.1


In [3]:
# Required packages are
# pip3 install torch transformers bitsandbytes hugging_face_hub

# Define the constants, mapping topics to model names
MATH_MODEL_NAME = "Qwen/Qwen2.5-Math-1.5B-Instruct"
CODING_MODEL_NAME = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
LIFE_ADVICE_MODEL_NAME = "OpenAssistant/oasst-sft-1-pythia-12b"
GENERAL_MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"

# Classification model, labels, and threshold
CLASSIFICATION_MODEL = "facebook/bart-large-mnli"
CLASSIFICATION_LABELS = ["math", "coding", "life advice", "general"]
CLASSIFICATION_THRESHOLD = 0.4


In [4]:
# Authenticate in huggingface to download models
from huggingface_hub import login

HUGGINGFACE_API_KEY = "ENTER_TOKEN_HERE"

# Log in to Hugging Face using API key
login(HUGGINGFACE_API_KEY)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
# Define a function for fetching the model (download if necessary)
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


def fetch_model(model_name, quantize_bits=4):
    """
    Checks if the model is available locally. If not, downloads and caches it.

    Args:
        model_name (str): The model name to fetch.
        quantize_bits (int): The number of bits to quantize the model to.

    Returns:
        tokenizer (transformers.AutoTokenizer): The tokenizer for the model.
        model (transformers.AutoModelForCausalLM): The model.

    """
    # Define the quantization configuration
    quant_config = BitsAndBytesConfig(
        load_in_4bit=(quantize_bits == 4),
        load_in_8bit=(quantize_bits == 8),
    )

    print(f"Model {model_name} is being loaded...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=quant_config,
    )

    return tokenizer, model

In [6]:
# Define a function for classifying the topic of a question, it's generic and can be used for any model

class TopicModelHandler:
    """ Base class for topic model handlers, which provide answers to specific topics """
    def __init__(self, model_name):
        self.tokenizer, self.model = fetch_model(model_name, quantize_bits=4)

    def answer(self, question):
        inputs = self.tokenizer(question, return_tensors="pt")
        outputs = self.model.generate(
            max_new_tokens=100,
            **inputs
        )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)


# Define each model handler as dict
MODELS = {
    "math": lambda: TopicModelHandler(MATH_MODEL_NAME),
    "coding": lambda: TopicModelHandler(CODING_MODEL_NAME),
    "life advice": lambda: TopicModelHandler(LIFE_ADVICE_MODEL_NAME),
    "general": lambda: TopicModelHandler(GENERAL_MODEL_NAME)
}

In [7]:
# Build the topic classifier
import torch
from transformers import pipeline


class ZeroShotClassifier:
    """
        Zero-shot classification using Hugging Face's pipeline

        Example usage:
        classifier = ZeroShotClassifier()
        result = classifier.classify("How to learn Python?")

        Returns:
        {
            'sequence': query,
            'labels': settings.CLASSIFICATION_LABELS,
            'scores': *vector of probability distribution over the labels
        }
    """
    def __init__(self):
        # Check if a GPU is available and set the device
        self.device = 0 if torch.cuda.is_available() else -1  # Use -1 for CPU
        print(f"Using device: {'GPU' if self.device == 0 else 'CPU'}")

        # Load the zero-shot classification pipeline with explicit model
        self.classifier = pipeline(
            "zero-shot-classification",
            model=CLASSIFICATION_MODEL,
            device=self.device
        )

    def classify(self, query, candidate_labels=CLASSIFICATION_LABELS):
        # Use the classifier to predict the topic
        return self.classifier(query, candidate_labels)

In [8]:
# **OPTIONAL** Test the classifier
classifier = ZeroShotClassifier()

# Get user input to classify
query = input("Enter your query: ")

# Classify the query
result = classifier.classify(query)
winner_label, winner_score = result["labels"][0], result["scores"][0]

# Print the predicted topic and score
print(
    f"Query: '{query}' | Predicted Topic: {winner_label}, Score: {winner_score:.2f}"
)

Using device: GPU


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



Enter your query: what is the derivative of x^2?
Query: 'what is the derivative of x^2?' | Predicted Topic: math, Score: 0.92


In [9]:
# Finally wrap up and create a function to answer questions
def handle_query(query):
    """ Classifies query and finds answer in a specific model """
    classifier = ZeroShotClassifier()

    result = classifier.classify(query)

    result_label, confidence = result["labels"][0], result["scores"][0]
    print(f"Using model: {result_label}, Confidence: {confidence:.2f}")

    # check threshold to decide if we should use topic model or general model
    if confidence < CLASSIFICATION_THRESHOLD:
        result_label = "general"

    handler_api = MODELS[result_label]()

    return handler_api.answer(query)

In [10]:
# Finally, test the query handler to get query, classify it, and get the answer from the target model
user_query = input("Enter your query: ")
response = handle_query(user_query)
print("Response:", response)

Enter your query: what is the derivative of x^2?
Using device: GPU
Using model: math, Confidence: 0.92
Model Qwen/Qwen2.5-Math-1.5B-Instruct is being loaded...


tokenizer_config.json:   0%|          | 0.00/7.32k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



Response: what is the derivative of x^2? The derivative of \( x^2 \) is \( 2x \). This is obtained using the power rule of differentiation, which states that if \( f(x) = x^n \), then \( f'(x) = nx^{n-1} \). Here, \( n = 2 \), so the derivative of \( x^2 \) is \( 2x \).
