## Start VLM server

In [1]:
from unsloth.dataprep import SyntheticDataKit
import os
import sys
import requests

# Add vllm directory to PATH (important if vllm is installed in a custom location)
vllm_path = "/work/users/s/m/smerrill/.conda/envs/synthetic-data/bin"
os.environ["PATH"] = vllm_path + os.pathsep + os.environ["PATH"]

# Optional: Add Hugging Face cache if needed
os.environ["HF_HOME"] = "/work/users/s/m/smerrill/.cache/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/work/users/s/m/smerrill/.cache/huggingface"

generator = SyntheticDataKit.from_pretrained(
    # Choose any model from https://huggingface.co/unsloth
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = 2048, # Longer sequence lengths will be slower!
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 05-21 16:38:34 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 05-21 16:38:34 [__init__.py:239] Automatically detected platform cuda.


2025-05-21 16:38:36,469	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


Unsloth: Your GPU cannot handle sequence lengths of 256 due to limited GPU memory.
Unsloth: Your GPU can only handle approximately the maximum sequence length of 256.
Unsloth: Using dtype = torch.bfloat16 for vLLM.
Unsloth: vLLM loading unsloth/Llama-3.2-3B-Instruct with actual GPU utilization = 7.88%
Unsloth: Your GPU has CUDA compute capability 8.0 with VRAM = 39.49 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 256. Num Sequences = 128.
Unsloth: vLLM's KV Cache can use up to 0.0 GB. Also swap space = 6 GB.
vLLM STDOUT: INFO 05-21 16:38:47 [__init__.py:239] Automatically detected platform cuda.
vLLM STDOUT: INFO 05-21 16:38:55 [api_server.py:1043] vLLM API server version 0.8.5.post1
vLLM STDOUT: INFO 05-21 16:38:55 [api_server.py:1044] args: Namespace(subparser='serve', model_tag='unsloth/Llama-3.2-3B-Instruct', config='', host=None, port=8000, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_meth

## Generate QA Pairs + Auto clean data

In [2]:
generator.prepare_qa_generation(
    output_folder = "data", # Output location of synthetic data
    temperature = 0.7, # Higher temp makes more diverse datases
    top_p = 0.95,
    overlap = 64, # Overlap portion during chunking
    max_generation_tokens = 512, # Can increase for longer QA pairs
)

### Sanity Checks

In [3]:
!synthetic-data-kit system-check

[?25l[32m VLLM server is running at [0m[4;94mhttp://localhost:8000/v1[0m
[2KAvailable models: [1m{[0m[32m'object'[0m: [32m'list'[0m, [32m'data'[0m: [1m[[0m[1m{[0m[32m'id'[0m: 
[32m'unsloth/Llama-3.2-3B-Instruct'[0m, [32m'object'[0m: [32m'model'[0m, [32m'created'[0m: [1;36m1747859970[0m, 
[32m'owned_by'[0m: [32m'vllm'[0m, [32m'root'[0m: [32m'unsloth/Llama-3.2-3B-Instruct'[0m, [32m'parent'[0m: [3;35mNone[0m, 
[32m'max_model_len'[0m: [1;36m2048[0m, [32m'permission'[0m: [1m[[0m[1m{[0m[32m'id'[0m: 
[32m'modelperm-e1b76278cdc74aae9a2fb06d336c0654'[0m, [32m'object'[0m: [32m'model_permission'[0m, 
[32m'created'[0m: [1;36m1747859970[0m, [32m'allow_create_engine'[0m: [3;91mFalse[0m, [32m'allow_sampling'[0m: [3;92mTrue[0m, 
[32m'allow_logprobs'[0m: [3;92mTrue[0m, [32m'allow_search_indices'[0m: [3;91mFalse[0m, [32m'allow_view'[0m: [3;92mTrue[0m, 
[32m'allow_fine_tuning'[0m: [3;91mFalse[0m, [32m'organization'[

In [5]:
url = "http://localhost:8000/v1/chat/completions"

payload = {
    "model": "unsloth/Llama-3.2-3B-Instruct",
    "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is the capital of France?"}
    ],
    "temperature": 0.7,
    "max_tokens": 100
}

response = requests.post(url, json=payload)

# Pretty-print the result
if response.status_code == 200:
    print(response.json()["choices"][0]["message"]["content"])
else:
    print(f"Error {response.status_code}: {response.text}")


The capital of France is Paris.


### Ingest

In [7]:
!synthetic-data-kit ingest /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/acuff.pdf
!synthetic-data-kit ingest /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/le.pdf
!synthetic-data-kit ingest /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/paige.pdf
!synthetic-data-kit ingest /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/osborne.pdf

!synthetic-data-kit ingest /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/YT_acuff.pdf
!synthetic-data-kit ingest /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/YT_le.pdf
!synthetic-data-kit ingest /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/YT_paige.pdf
!synthetic-data-kit ingest /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/YT_osborne.pdf

[2K[32m⠙[0m Processing /nas/longleaf/home/smerrill/notebooks/LLM/pdfs/acuff.pdf.....
[1A[2K[32m Text successfully extracted to [0m[1;32mdata/output/acuff.txt[0m


### Generate

#### Custom Bios text

In [9]:
!synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        create /nas/longleaf/home/smerrill/notebooks/LLM/data/output/acuff.txt  \
        --num-pairs 25 \
        --type "qa"
!synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        create /nas/longleaf/home/smerrill/notebooks/LLM/data/output/le.txt  \
        --num-pairs 25 \
        --type "qa"

[?25l[32m⠋[0m Generating qa content from 
[2K[1A[2K[32m⠙[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠹[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠸[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠼[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠴[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠦[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠧[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠇[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠏[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠋[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠙[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠹[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠸[0m Generating qa content from put/acuff.txt...
[2K[1A[2K[32m⠼[0m Generating qa content from put/acuff.txt...
[2K[1A[2KProce

In [10]:
!synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        create /nas/longleaf/home/smerrill/notebooks/LLM/data/output/paige.txt  \
        --num-pairs 25 \
        --type "qa"
!synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        create /nas/longleaf/home/smerrill/notebooks/LLM/data/output/osborne.txt  \
        --num-pairs 25 \
        --type "qa"

[?25l[32m⠋[0m Generating qa content from 
[2K[1A[2K[32m⠙[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠹[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠸[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠼[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠴[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠦[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠧[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠇[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠏[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠋[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠙[0m Generating qa content from put/paige.txt...
[2K[1A[2K[32m⠹[0m Generating qa content from put/paige.txt...
[2K[1A[2KProcessing 2 chunks to generate QA pairs...aige.txt...
[32m⠹[0m Generating qa content from 
[2K[1A[2K[32m⠸[0m Generating qa content 

### Youtube Transcripts

In [5]:
!synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        create /nas/longleaf/home/smerrill/notebooks/LLM/data/output/YT_acuff.txt  \
        --num-pairs 25 \
        --type "qa"
!synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        create /nas/longleaf/home/smerrill/notebooks/LLM/data/output/YT_le.txt  \
        --num-pairs 25 \
        --type "qa"

[?25l[32m⠋[0m Generating qa content from 
[2K[1A[2K[32m⠙[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠹[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠸[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠼[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠴[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠦[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠧[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠇[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠏[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠋[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠙[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2K[32m⠹[0m Generating qa content from put/YT_acuff.txt...
[2K[1A[2KProcessing 3 chunks to generate QA pairs...T_acuff.txt...
[32m⠹[0m Generating qa content from 
[2K[

In [4]:
!synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        create /nas/longleaf/home/smerrill/notebooks/LLM/data/output/YT_paige.txt  \
        --num-pairs 25 \
        --type "qa"
!synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        create /nas/longleaf/home/smerrill/notebooks/LLM/data/output/YT_osborne.txt  \
        --num-pairs 25 \
        --type "qa"

[?25l[32m⠋[0m Generating qa content from 
[2K[1A[2K[32m⠙[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠹[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠸[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠼[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠴[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠦[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠧[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠇[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠏[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠋[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠙[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠹[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2K[32m⠸[0m Generating qa content from put/YT_paige.txt...
[2K[1A[2KProcessing 2 chunks to generate Q

### Convert to HF format

In [9]:
generated_path ='/nas/longleaf/home/smerrill/notebooks/LLM/data/generated'
qa_pairs_filenames = os.listdir(generated_path)
for filename in qa_pairs_filenames:
    filename = os.path.join(generated_path, filename)
    !synthetic-data-kit \
        -c synthetic_data_kit_config.yaml \
        save-as {filename} -f ft

[?25l[32m⠋[0m Converting 
/nas/longleaf/home/smerrill/notebooks/LLM/data/generated/test_write.json to ft 
format with json storage...
[1A[2K[1A[2K[1A[2K[31mL Error: Unrecognized data format - expected QA pairs or conversations[0m
[?25l[32m⠋[0m Converting 
/nas/longleaf/home/smerrill/notebooks/LLM/data/generated/acuff_qa_pairs.json to 
ft format with json storage...
[1A[2K[1A[2K[1A[2K[32m Converted to ft format and saved to [0m[1;32mdata/final/acuff_qa_pairs_ft.json[0m
[?25l[32m⠋[0m Converting 
/nas/longleaf/home/smerrill/notebooks/LLM/data/generated/le_qa_pairs.json to ft 
format with json storage...
[1A[2K[1A[2K[1A[2K[32m Converted to ft format and saved to [0m[1;32mdata/final/le_qa_pairs_ft.json[0m
[?25l[32m⠋[0m Converting 
/nas/longleaf/home/smerrill/notebooks/LLM/data/generated/paige_qa_pairs.json to 
ft format with json storage...
[1A[2K[1A[2K[1A[2K[32m Converted to ft format and saved to [0m[1;32mdata/final/paige_qa_pairs_ft.json[

In [None]:
#generator.cleanup()

Attempting to terminate the VLLM server gracefully...
Server terminated gracefully.
