<a href="https://colab.research.google.com/github/prakhar-kt/HuggingFaceNotebooks/blob/main/llm_dataset_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q -U torch bitsandbytes transformers sentencepiece accelerate gradio

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m120.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m321.4/321.4 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m112.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import requests
from google.colab import drive, userdata
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
import gradio as gr

In [4]:
hf_token = userdata.get("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

### MODEL

In [5]:
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto",
    )


config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

### TOKENIZER

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [17]:
def generate_dataset(topic, multishot_examples):
    response_length = len(multishot_examples) * 10
    system_prompt = f"""You are a helpful assistant,
                    whose purpose is to generate datasets for the given topic.
                    Return the dataset in a json format.
                    Include the following examples: {multishot_examples}.
                    Return {response_length} number of examples every time.
                    Do not include the provided examples in the response
                    """
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"Generate dataset for the topic: {topic}"}
    ]
    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
    streamer = TextStreamer(tokenizer)

    outputs = model.generate(inputs, max_new_tokens=4000, streamer=streamer)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)



In [14]:
def gradio_interface(topic, multishot_examples):
    return generate_dataset(topic, multishot_examples)

In [15]:
default_topic = "E-commerce Customer Service Support: Handling Product Returns and Refunds"

default_multi_shot_examples = [
    {
        "instruction": "My order arrived damaged. What's your return policy?",
        "response": "I'm sorry to hear about your damaged order. You can initiate a return within 30 days of delivery. We'll provide a free return shipping label and process your refund once we receive the item back at our warehouse."
    },
    {
        "instruction": "I ordered the wrong size. Can I exchange it for a different one?",
        "response": "Yes, you can exchange your item for a different size within 30 days. Simply initiate a return on our website, select 'exchange' instead of 'refund', and choose your preferred size. We'll ship the new size once we receive your return."
    },
    {
        "instruction": "The refund for my return hasn't appeared in my account yet. It's been 5 days.",
        "response": "Once we receive your return, refunds typically take 3-5 business days to process and an additional 5-10 business days to appear in your account, depending on your bank. I can check the status of your refund if you provide your order number."
    },
    {
        "instruction": "Can I return an item I bought on sale?",
        "response": "Yes, sale items can be returned within 30 days of delivery. However, final sale items marked with a 'Final Sale' tag are not eligible for return or exchange. Your original receipt will indicate if an item was marked as final sale."
    }
]

In [16]:
gr_interface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Topic", value=default_topic),
        gr.JSON(label="Multi-shot Examples", value=default_multi_shot_examples)
    ],
    outputs=gr.Textbox(label="Generated Dataset")
)

In [12]:
gr_interface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://44a4c44d092b157f45.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


