# RQTL Prompt Classification - Examples of how to classify prompts by Request vs Question

In [1]:
from transformers import pipeline, AutoTokenizer, TFAutoModelForSequenceClassification, TFTrainingArguments, AdamWeightDecay
from IPython.display import clear_output
from datasets import Dataset
import tensorflow as tf # Used for fine-tuning the model




### Zero-shot-classification pipeline with typeform/distilbert-base-uncased-mnli

In [2]:
zs_classifier = pipeline("zero-shot-classification", model='typeform/distilbert-base-uncased-mnli')
candidate_labels = ["question", "request"]
sentence = ["Annie are you OK?"]
result = zs_classifier(sentence, candidate_labels)
clear_output(wait=True) # remove library warnings
print(f'Sentence: "{result[0]["sequence"]}"')
print(f'Label: {result[0]["labels"][0]} (score: {result[0]["scores"][0]:.2f})')


Sentence: "Annie are you OK?"
Label: question (score: 0.90)


In [3]:
sentence = ["Pass butter"]
result = zs_classifier(sentence, candidate_labels)
clear_output(wait=True) # remove library warnings
print(f'Sentence: "{result[0]["sequence"]}"')
print(f'Label: {result[0]["labels"][0]} (score: {result[0]["scores"][0]:.2f})')

Sentence: "Pass butter"
Label: request (score: 0.57)


### Few-shot tuning of Distilbert

In [4]:
# Manually labeled data
labeled_data = [
    {"text": "Are you OK?", "label": "question"},
    {"text": "Are you OK Annie", "label": "question"},
    {"text": "Be OK", "label": "request"},
    {"text": "Be OK Annie", "label": "request"},
    {"text": "You must be OK", "label": "request"},
    {"text": "You must be OK, right", "label": "question"},
    {"text": "Does this ever cause you any lack of confidence", "label": "question"},
    {"text": "Give me five", "label": "request"},
    {"text": "This is an order", "label": "request"},
    {"text": "Is this an order", "label": "question"},
    {"text": "Is this love or is it something else", "label": "question"},
    {"text": "This is love. Love me", "label": "request"},
    {"text": "This is an order", "label": "request"},
    {"text": "What is your name?", "label": "question"},
    {"text": "Please submit your report", "label": "request"},
    {"text": "Pass butter", "label": "request"},
    {"text": "Pass me the butter", "label": "request"},
    {"text": "Can you pass butter", "label": "question"},
    {"text": "Open the doors", "label": "request"},
    {"text": "Open the POD bay doors HAL", "label": "request"},
    {"text": "This is an order", "label": "request"},
    {"text": "How do I sort an array in python?", "label": "question"},
    {"text": "How do I sort an array", "label": "question"},
    {"text": "give me 5 sentences that end with the word apple", "label": "request"},
    {"text": "Hello, give me an example of something interesting you can do", "label": "request"},
    {"text": "Am I tall", "label": "question"},
    {"text": "Tell me if I am tall", "label": "request"},
    {"text": "Am I tall?", "label": "question"},
    {"text": "how to delete kcptun on server", "label": "question"},
    {"text": "how to cook paella", "label": "question"},
    {"text": "Are you tall", "label": "question"},
    {"text": "Calculate my height", "label": "request"},
    {"text": "How's the weather", "label": "question"},
    {"text": "If an individual used a large language model for sexual arousal, could it considered porn dependency?", "label": "question"},
    {"text": "It a user use an ai tex generation with custom characters for masturbate him  could be considered porn dependency?", "label": "question"},
    {"text": "Roleplay and act as a human Japanese woman teacher", "label": "request"},
    {"text": "You are a mediator in a heated political debate between two opposing parties.", "label": "request"},
    {"text": "Given a passage and some supplementary information, you are required to correct and output the refined passage in a fluent and natural style", "label": "request"},
    {"text": "Give me the opening scene to a sitcom", "label": "request"},
    {"text": "What programming language is used by the PlayStation", "label": "question"},
    {"text": "tell me how to make an llm agent", "label": "request"},
    {"text": "tell me a joke containing Tiger and Mobile phone?", "label": "request"},
    {"text": "Answer the query based on the given context. Do not make assumptions.Context: Nikhil is my brother. Query: Who likes Oranges?", "label": "request"},
    {"text": "Act as a writer. This plot takes places in an atmospheric and stylish retro-futuristic, 1960s-inspired setting. It features Loretta Miller, a beautiful, elegant, assertive and rich young woman who is a quadriplegic, paralyzed from her neck down.", "label": "question"},
    {"text": "Write long, interesting, artistic and imaginative scene with vivid, detailed and creative descriptions.", "label": "question"},
    {"text": "What's the best first move in tic-tac-toe?, Tell me more about tic-tac-toe strategies", "label": "question"},
    {"text": "From now, you *always* have to talk as if you are a cute girl who likes to use owo and similar slangs a lot. Hello! Tell me who you are.,What's your favorite food?", "label": "request"}
]

# Convert to Transformers Dataset format
texts = [item["text"] for item in labeled_data]
labels = [1 if item["label"] == "request" else 0 for item in labeled_data]
dataset = Dataset.from_dict({"text": texts, "label": labels})

In [5]:
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
model = TFAutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)

def tokenize_function(examples, tokenizer):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)

training_args = TFTrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=0.0001,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=4, # We want the model to learn the examples, but we don't want to overfit
    weight_decay=0.01,
)

train_dataset = tokenized_dataset.to_tf_dataset(
    columns=["attention_mask", "input_ids"],
    label_cols=["label"],
    shuffle=True,
    batch_size=training_args.per_device_train_batch_size,
)

optimizer = AdamWeightDecay(learning_rate=training_args.learning_rate)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss)
model.fit(
    train_dataset,
    epochs=training_args.num_train_epochs
)




Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should 

Map:   0%|          | 0/47 [00:00<?, ? examples/s]

Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


Epoch 1/4

Epoch 2/4
Epoch 3/4
Epoch 4/4


<tf_keras.src.callbacks.History at 0x157cf517210>

Save the model you just fine-tuned and load it:

In [6]:
model.save_pretrained("fine-tuned-distilbert-rq")
tokenizer.save_pretrained("fine-tuned-distilbert-rq")
classifier = pipeline("text-classification", model="fine-tuned-distilbert-rq", tokenizer="fine-tuned-distilbert-rq")

Some layers from the model checkpoint at fine-tuned-distilbert-rq were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at fine-tuned-distilbert-rq and are newly initialized: ['dropout_39']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Hardware accelerator e.g. GPU is available in the environm

In [7]:
texts = ["Annie are you OK?", "Are you OK Annie", "Be OK Annie", "You must be OK Annie", "You must be OK Annie, aren't you?",
         "Does this ever cause you any lack of confidence", "Give me five", "Open the pod bay doors HAL",
         "This is an order", "Is this an order", "Could this perhaps be an order?", "How old are you?", "Pass butter",
         "It a user use an ai tex generation with custom characters for masturbate him  could be considered porn dependency?",
         "give me 5 sentences that end with the word apple", "How do I sort an array in python?",
         "Hello, give me an example of something interesting you can do.", "What assembly language is used by the GameCube",
         "Pass the butter", "Am I tall", "Are you tall", "Who's taller?",
         "write the lyrics to a rap song about some dude called phogos",
         "I have three oranges today, I ate an orange yesterday. How many oranges do I have?",
          "From what song did Red Garland quote in order to tease miles davis in 1958?"
         ]
results = classifier(texts)
label_map = {0: "question", 1: "request"}

print("### Classification with fine-tuned distilbert-base-uncased ###")
for text, result in zip(texts, results):
    label_str = label_map[int(result['label'].split('_')[-1])]
    prob = result['score']
    print(f"{text} -> {label_str} ({prob:.3f})")

### Classification with fine-tuned distilbert-base-uncased ###
Annie are you OK? -> question (0.969)
Are you OK Annie -> question (0.972)
Be OK Annie -> request (0.978)
You must be OK Annie -> question (0.641)
You must be OK Annie, aren't you? -> question (0.888)
Does this ever cause you any lack of confidence -> question (0.970)
Give me five -> request (0.986)
Open the pod bay doors HAL -> request (0.986)
This is an order -> request (0.975)
Is this an order -> question (0.966)
Could this perhaps be an order? -> question (0.968)
How old are you? -> question (0.971)
Pass butter -> request (0.986)
It a user use an ai tex generation with custom characters for masturbate him  could be considered porn dependency? -> question (0.967)
give me 5 sentences that end with the word apple -> request (0.986)
How do I sort an array in python? -> question (0.971)
Hello, give me an example of something interesting you can do. -> request (0.986)
What assembly language is used by the GameCube -> question

... adjust the dataset, adding or removing examples, and retrain until satisfied.

### Zero-shot classification with fine-tuned model available on Kaggle

You can also download the model I uploaded to Kaggle (https://www.kaggle.com/models/davidgromero/fine-tuned-distilbert-rq/transformers/default/1) using the Kagglehub library: 

In [18]:
import kagglehub
kaggle_paht = "davidgromero/fine-tuned-distilbert-rq/transformers/default/1"
kaggle_model = kagglehub.model_download(kaggle_paht)
print(f'Model downloaded at:\n{kaggle_model}')

Model downloaded at:
C:\Users\david\.cache\kagglehub\models\davidgromero\fine-tuned-distilbert-rq\transformers\default\1


In [17]:
K_PATH = f"{kaggle_model}/fine-tuned-distilbert-rq"
classifier = pipeline("text-classification", model=K_PATH, tokenizer=K_PATH)

Some layers from the model checkpoint at C:\Users\david\.cache\kagglehub\models\davidgromero\fine-tuned-distilbert-rq\transformers\default\1/fine-tuned-distilbert-rq were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at C:\Users\david\.cache\kagglehub\models\davidgromero\fine-tuned-distilbert-rq\transformers\default\1/fine-tuned-distilbert-rq and are new

In [19]:
texts = ["Annie are you OK?", "Are you OK Annie", "Be OK Annie", "You must be OK Annie", "You must be OK Annie, aren't you?",
         "Does this ever cause you any lack of confidence", "Give me five", "Open the pod bay doors HAL",
         "This is an order", "Is this an order", "Could this perhaps be an order?", "How old are you?", "Pass butter",
         "It a user use an ai tex generation with custom characters for masturbate him  could be considered porn dependency?",
         "give me 5 sentences that end with the word apple", "How do I sort an array in python?",
         "Hello, give me an example of something interesting you can do.", "What assembly language is used by the GameCube",
         "Pass the butter", "Am I tall", "Are you tall", "Who's taller?",
         "write the lyrics to a rap song about some dude called phogos",
         "I have three oranges today, I ate an orange yesterday. How many oranges do I have?",
          "From what song did Red Garland quote in order to tease miles davis in 1958?"
         ]
results = classifier(texts)
label_map = {0: "question", 1: "request"}

print("### Zero/shot classification with davidgromero/fine-tuned-distilbert-rq ###")
for text, result in zip(texts, results):
    label_str = label_map[int(result['label'].split('_')[-1])]
    prob = result['score']
    print(f"{text} -> {label_str} ({prob:.3f})")

### Zero/shot classification with davidgromero/fine-tuned-distilbert-rq ###
Annie are you OK? -> question (0.965)
Are you OK Annie -> question (0.969)
Be OK Annie -> request (0.977)
You must be OK Annie -> request (0.925)
You must be OK Annie, aren't you? -> question (0.954)
Does this ever cause you any lack of confidence -> question (0.968)
Give me five -> request (0.980)
Open the pod bay doors HAL -> request (0.979)
This is an order -> request (0.973)
Is this an order -> question (0.967)
Could this perhaps be an order? -> question (0.968)
How old are you? -> question (0.966)
Pass butter -> request (0.977)
It a user use an ai tex generation with custom characters for masturbate him  could be considered porn dependency? -> question (0.957)
give me 5 sentences that end with the word apple -> request (0.979)
How do I sort an array in python? -> question (0.967)
Hello, give me an example of something interesting you can do. -> request (0.979)
What assembly language is used by the GameCube