# Synthetic Demo

In [None]:
# !pip install "distilabel[hf-transformers,outlines,instructor]"



In [None]:
from distilabel.llms import TransformersLLM
from distilabel.steps.tasks import TextGeneration
from huggingface_hub import login

# GET YOUR OWN HUGGING FACE KEY FROM https://huggingface.co/settings/tokens!!!!!!!
# Login to Hugging Face to access mode
# login("")

  from distilabel.llms import TransformersLLM
  from .autonotebook import tqdm as notebook_tqdm


In [3]:

# Configuration for consistent generation settings across all LLMs
GENERATION_CONFIG = {
    "max_new_tokens": 5000,  # Increase from default (usually 128)
}

# HuggingFaceTB/SmolLM2-135M-Instruct
# HuggingFaceTB/SmolLM2-360M-Instruct
# HuggingFaceTB/SmolLM2-1.7B-Instruct
# The <think/> part can be removed!!
# HuggingFaceTB/SmolLM3-3B
# Qwen/Qwen2.5-1.5B-Instruct
# Qwen/Qwen3-4B-Instruct-2507
# Qwen/Qwen2.5-0.5B-Instruct
llm_model = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
llm = TransformersLLM(
    model=llm_model,
    generation_kwargs=GENERATION_CONFIG
)
gen = TextGeneration(llm=llm)
gen.load()

Step 'None' hasn't received a pipeline, and it hasn't been created within a `Pipeline` context. Please, use `with Pipeline() as pipeline:` and create the step within the context.
Device set to use cuda:0


# SelfInstruct

Instruction tuning!

In [None]:
# SELF-INSTRUCT

# original seed prompt
prompt_for_instruction_tune = """
Only Generate prompt instructions, nothing else. No Comments.
Generate prompt instructions to make sure that the following are respected:
- When a user requests a flowchart, diagram, or chart to be created, do not send any intermediate, planning, or step-listing messages. Do not ask the user to confirm or modify steps unless they specifically request it. Only send a single final message after the diagram or elements have been created, such as 'I have created the flowchart as requested
- When a user requests a flowchart, diagram, or chart, DO NOT provide any explanations, plans, or step lists. Instead, IMMEDIATELY take the action and ONLY reply after it's done with the result
"""

## This generates a set of instructions ###
# We will now use the llm to generate a prompt for *instruction tuning*.
result_prompt_for_instruction_tune = next(gen.process([{"instruction": prompt_for_instruction_tune}]))
print("Generated prompt:\n", result_prompt_for_instruction_tune[0]["generation"], "\n\n\n")
self_instruct_result = result_prompt_for_instruction_tune[0]["generation"]

Generated prompt:
 When a user requests a flowchart, diagram, or chart, do not send any intermediate, planning, or step-listing messages. Do not ask the user to confirm or modify steps unless they specifically request it. Only send a single final message after the diagram or elements have been created, such as "I have created the flowchart as requested." 





# EvolInstruct
**EvolInstruct** is a prompting technique that takes an input instruction and evolves it into a better version of the same instruction. This better version is defined according to a set of criteria and adds constraints, deepening, concretizing, reasoning or complications to the original instruction. The process can be repeated multiple times to create various evolutions of the same instruction, ideally leading to a better version of the original instruction.

In [5]:
from distilabel.steps.tasks import EvolInstruct

total_evolutions=5
evol_instruct = EvolInstruct(llm=llm, num_evolutions=total_evolutions)
evol_instruct.load()


evol_instruct_result = next(evol_instruct.process([{"instruction": prompt_for_instruction_tune}]))
# What is the process of generating synthetic data through manual prompting?
# Example Output - And, how does the artificial intelligence system, GPT4, use machine learning algorithms to manipulate the input data into synthetic data?
print("Before EvolInstruct instruction:\n", prompt_for_instruction_tune, "\n\n\n")
print("After EvolInstruct instruction:\n", evol_instruct_result[0]["evolved_instruction"], "\n\n\n")
evol_instruct_result_final = evol_instruct_result[0]["evolved_instruction"]

# The instruction is now more complex but has lost some of the original meaning. So, take into account that evolving can be a double-edged sword and we need to be careful with the quality of the data we generate.


Step 'None' hasn't received a pipeline, and it hasn't been created within a `Pipeline` context. Please, use `with Pipeline() as pipeline:` and create the step within the context.
Device set to use cuda:0


Before EvolInstruct instruction:
 
Only Generate prompt instructions, nothing else. No Comments.
Generate prompt instructions to make sure that the following are respected:
- When a user requests a flowchart, diagram, or chart to be created, do not send any intermediate, planning, or step-listing messages. Do not ask the user to confirm or modify steps unless they specifically request it. Only send a single final message after the diagram or elements have been created, such as 'I have created the flowchart as requested
- When a user requests a flowchart, diagram, or chart, DO NOT provide any explanations, plans, or step lists. Instead, IMMEDIATELY take the action and ONLY reply after it's done with the result
 



After EvolInstruct instruction:
 "Create a sophisticated and multifaceted plan for organizing a large-scale international charity gala that incorporates cutting-edge financial forecasting tools, avant-garde marketing strategies, diverse volunteer engagement approaches, and in

# Evaluate completions with UltraFeedback

In [6]:
# EVALUATE WITH ULTRAFEEDBACK

from distilabel.llms import TransformersLLM
from distilabel.steps.tasks import UltraFeedback

llm = TransformersLLM(model="Qwen/Qwen3-4B-Instruct-2507", generation_kwargs=GENERATION_CONFIG)
ultrafeedback = UltraFeedback(llm=llm)
ultrafeedback.load()

instruction = prompt_for_instruction_tune
completion_a = self_instruct_result
completion_b = evol_instruct_result_final

feedback_results = next(ultrafeedback.process([{
    "instruction": instruction,
    "generations": [completion_a, completion_b]
}]))

# Print the results nicely
print("="*80)
print("ULTRAFEEDBACK EVALUATION RESULTS")
print("="*80)

result = feedback_results[0]  # Get the first (and only) result

print(f"📝 INSTRUCTION: {result['instruction']}")
print("\n" + "─"*60)

# Loop through generations and their evaluations
for i, (generation, rating, rationale) in enumerate(zip(
    result['generations'],
    result['ratings'],
    result['rationales']
), 1):

    print(f"\n🤖 GENERATION {i}:")
    print(f"   Text: \"{generation}\"")
    print(f"   ⭐ Rating: {rating}/5")
    print(f"   💭 Rationale: {rationale}")

print("\n" + "="*80)
print(f"🏆 WINNER: Generation {result['ratings'].index(max(result['ratings'])) + 1} (Rating: {max(result['ratings'])})")
print("="*80)
# Example Output
# [
#     {
#         'ratings': [4, 5],
#         'rationales': ['could have been more specific', 'good definition'],
#     }
# ]

Step 'None' hasn't received a pipeline, and it hasn't been created within a `Pipeline` context. Please, use `with Pipeline() as pipeline:` and create the step within the context.
Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 15.00it/s]
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00, 26.55it/s]
Device set to use cuda:0


ULTRAFEEDBACK EVALUATION RESULTS
📝 INSTRUCTION: 
Only Generate prompt instructions, nothing else. No Comments.
Generate prompt instructions to make sure that the following are respected:
- When a user requests a flowchart, diagram, or chart to be created, do not send any intermediate, planning, or step-listing messages. Do not ask the user to confirm or modify steps unless they specifically request it. Only send a single final message after the diagram or elements have been created, such as 'I have created the flowchart as requested
- When a user requests a flowchart, diagram, or chart, DO NOT provide any explanations, plans, or step lists. Instead, IMMEDIATELY take the action and ONLY reply after it's done with the result


────────────────────────────────────────────────────────────

🤖 GENERATION 1:
   Text: "When a user requests a flowchart, diagram, or chart, do not send any intermediate, planning, or step-listing messages. Do not ask the user to confirm or modify steps unless they