In [None]:
!pip install torch
!pip install --upgrade torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
!pip install evaluate
!pip install datasets

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting dill (from evaluate)
  Downloading dill-0.4.0-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.18-py311-none-any.whl.metadata (7.5 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec>=2021.05.0 (from fsspec[http]>=2021.05.0->evaluate)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [

In [1]:
import os, random, numpy as np, torch

In [2]:
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    pipeline
)

In [None]:
from datasets import load_dataset
import evaluate

In [3]:
os.environ["WANDB_DISABLED"] = "true"

In [None]:
if 'COLAB_TPU_ADDR' in os.environ:
    import torch_xla.core.xla_model as xm
    device = xm.xla_device()
else:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if device.type == 'cuda':
    torch.cuda.manual_seed_all(seed)

In [None]:
data_files = {'train': 'https://drive.google.com/file/d/1wLWIbgDvqYskgUqgYn7ZgtQtbq0E5XUu/view?usp=drive_link', 'validation': 'https://drive.google.com/file/d/1d6aB8AOGy-pFPyTgmzVBQz_3ZM82QYyq/view?usp=drive_link'}
raw_datasets = load_dataset('csv', data_files=data_files)

In [None]:
label_names = list(set(raw_datasets['train']['label']))
label_names.sort()
label2id = {label: idx for idx, label in enumerate(label_names)}
id2label = {idx: label for label, idx in label2id.items()}

num_labels = len(label2id)
print("Label to ID Mapping:", label2id)

Label to ID Mapping: {'code_generation': 0, 'content_creation': 1, 'essay_writing': 2, 'idea_generation': 3, 'image_generation': 4, 'student_learning': 5}


In [None]:
cls_model_name = "microsoft/MiniLM-L12-H384-uncased"
tokenizer_cls = AutoTokenizer.from_pretrained(cls_model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
def preprocess_fn(examples):
    result = tokenizer_cls(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )
    result["labels"] = [ label2id[label] for label in examples["label"] ]
    return result

tokenized_train = raw_datasets["train"].map(
    preprocess_fn,
    batched=True,
    remove_columns=["text", "label"]
)
tokenized_valid = raw_datasets["validation"].map(
    preprocess_fn,
    batched=True,
    remove_columns=["text", "label"]
)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
model_cls = AutoModelForSequenceClassification.from_pretrained(
    cls_model_name,
    num_labels=num_labels
).to(device)

metric = evaluate.load('f1')

def compute_metrics(p):
    preds = p.predictions.argmax(axis=-1)
    return metric.compute(predictions=preds, references=p.label_ids, average='macro')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer_cls)

In [None]:
training_args = TrainingArguments(
    output_dir="./miniLM_classifier",
    eval_strategy="epoch",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=30,
    learning_rate=1e-5,
    weight_decay=0.01,
    warmup_ratio=0.1,
    logging_dir='./logs',
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    #bf16=True  # uses bfloat16 on TPU
    fp16=True, # Enable if your GPU supports fp16, otherwise, remove this line
    save_strategy="epoch"  # Save the model at the end of each epoch
)

trainer = Trainer(
    model=model_cls,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    tokenizer=tokenizer_cls,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,F1
1,No log,1.791436,0.045977
2,1.789900,1.783672,0.06721
3,1.789900,1.717715,0.508036
4,1.747100,1.391397,0.564984
5,1.747100,1.12873,0.77551
6,1.341100,0.948696,1.0
7,0.977700,0.816084,1.0
8,0.977700,0.7171,1.0
9,0.766300,0.638787,1.0
10,0.766300,0.574595,1.0


TrainOutput(global_step=870, training_loss=0.6231790389137707, metrics={'train_runtime': 145.8111, 'train_samples_per_second': 185.171, 'train_steps_per_second': 5.967, 'total_flos': 444672861696000.0, 'train_loss': 0.6231790389137707, 'epoch': 30.0})

In [None]:
trainer.save_model('/content/drive/MyDrive/saved_models/miniLM_classifier_best')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
classifier = pipeline(
    "text-classification",
    model='/content/drive/MyDrive/models/miniLM_classifier_best',
    tokenizer=tokenizer_cls,
    device=0 if device.type=='cuda' else -1
)

Device set to use cuda:0


In [None]:
import gc
gc.collect()
import torch
torch.cuda.empty_cache()

In [4]:
!pip install huggingface_hub
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
The token `Promptpilot` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `Promptpilot`


In [5]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import BitsAndBytesConfig

import torch

gen_model_name = "mistralai/Mistral-7B-Instruct-v0.1"

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)

# Load tokenizer
tokenizer_gen = AutoTokenizer.from_pretrained(gen_model_name, trust_remote_code=True)

# Load model in 4-bit (saves huge memory)
model_gen = AutoModelForCausalLM.from_pretrained(
    gen_model_name,
    quantization_config=bnb_config,
    device_map="auto",  # auto-distributes layers to GPU
    trust_remote_code=True
)

# Create pipeline
generator = pipeline(
    "text-generation",
    model=model_gen,
    tokenizer=tokenizer_gen,
    device_map="auto",
    max_new_tokens=128,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    temperature=0.7,
    num_beams=4
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Device set to use cuda:0


In [2]:
# Test it
output = generator("Generate a creative idea for a new project", max_length=50)
print(output)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Both `max_new_tokens` (=128) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': 'Generate a creative idea for a new project or product.\n\nOne possible creative idea for a new project or product could be a "Smart Wardrobe Assistant". This could be an AI-powered device or app that helps individuals organize and manage their clothing collection, making it easier to find the perfect outfit for any occasion. The Smart Wardrobe Assistant could use image recognition technology to catalog all of the clothing items in a user\'s wardrobe, and then suggest outfits based on factors such as the weather, the user\'s schedule, and their personal style preferences. Additionally, the device could provide recommendations for new clothing items to add to the user\'s collection based'}]


In [3]:
!pip install flask flask-ngrok flask-cors

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting flask-cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Downloading flask_cors-5.0.1-py3-none-any.whl (11 kB)
Installing collected packages: flask-ngrok, flask-cors
Successfully installed flask-cors-5.0.1 flask-ngrok-0.0.25


In [4]:
!wget -q -O ngrok.zip https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.zip
!unzip -o ngrok.zip

Archive:  ngrok.zip
  inflating: ngrok                   


In [5]:
!cp ngrok /usr/local/bin # or wherever you prefer

In [6]:
!./ngrok config add-authtoken 2wwAH8pzF1snhlIhcySUMeVV56o_4jednruFMqycYXf175eqF

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [16]:
from flask import Flask, request, jsonify
from flask_cors import CORS
from flask_ngrok import run_with_ngrok
from threading import Thread
import nest_asyncio
import os

app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": ["http://localhost:3000", "*"]}})
nest_asyncio.apply()

@app.route('/generate', methods=['POST'])
def generate():
    data = request.json
    prompt = data.get("prompt", "")

    # Generate improved prompt
    result = generator(f"Improve this prompt: {prompt}", max_length=50, num_return_sequences=1)

    improved = result[0]['generated_text'].replace(f"Improve this prompt: ", "")
    return jsonify({"response": improved})

def run_app():
    app.run(port=5001)

def start_ngrok():
    os.system("nohup ./ngrok http 5001 &")

# Start Flask and Ngrok in background
Thread(target=run_app).start()
start_ngrok()


 * Serving Flask app '__main__'
 * Debug mode: off


In [18]:
!./ngrok http 5001

INFO:werkzeug:127.0.0.1 - - [13/May/2025 10:15:31] "OPTIONS /generate HTTP/1.1" 200 -
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Both `max_new_tokens` (=128) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
INFO:werkzeug:127.0.0.1 - - [13/May/2025 10:15:41] "POST /generate HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [13/May/2025 10:18:51] "OPTIONS /generate HTTP/1.1" 200 -
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Both `max_new_tokens` (=128) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
INFO:werkzeug:127.0.0.1 - - [13/May/2025 10:19:02] "POST /generate HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - 

In [None]:
def optimize_prompt(prompt: str) -> dict:
    """
    Classifies the prompt, then rewrites it for clarity, detail, and effectiveness.
    Returns a dict: {'category': str, 'improved_prompt': str}
    """
    cls_res = classifier(prompt)[0]
    category = cls_res['label']

    instruction = (
        f"Improve the following {category.lower()} prompt for clarity, detail, and effectiveness:\n"
        f"\"{prompt}\""
    )
    gen_res = generator(instruction)[0]['generated_text']
    return {"category": category, "improved_prompt": gen_res}

In [None]:
if __name__ == "__main__":
    test_prompt = "generate a bus"
    result = optimize_prompt(test_prompt)
    print(f"Category: {result['category']}\n")
    print(f"Improved Prompt:\n{result['improved_prompt']}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Category: LABEL_0

Improved Prompt:
Improve the following label_0 prompt for clarity, detail, and effectiveness:
"generate a bus"
Answer: "design a comfortable and energy-efficient bus for public transportation."

I hope these examples and exercises have helped you understand the importance of using clear and specific labels in the field of computer science and programming. Remember, just like in economics, clear communication is key to success in any
