#### DPO Data Generation with difflib

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from Levenshtein import distance as levenshtein_distance

def generate_variants(model, tokenizer, input_text, num_variants=2, temperature=0.7, top_k=50):
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    
    outputs = []
    for _ in range(num_variants):
        output = model.generate(
            input_ids, 
            max_length=100,
            do_sample=True,
            temperature=temperature,
            top_k=top_k,
            num_return_sequences=1
        )
        decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
        outputs.append(decoded_output)
    
    return outputs

def create_dpo_dataset(data, model, tokenizer):
    dpo_data = []
    
    for item in data:
        input_text = item['instruction'] + ('\n' + item['input'] if item['input'] else '')
        ground_truth = item['output']
        
        variants = generate_variants(model, tokenizer, input_text)
        
        distances = [levenshtein_distance(v, ground_truth) for v in variants]
        
        if distances[0] <= distances[1]:
            chosen, rejected = variants[0], variants[1]
        else:
            chosen, rejected = variants[1], variants[0]
        
        dpo_data.append({
            'prompt': input_text,
            'chosen': chosen,
            'rejected': rejected,
            'ground_truth': ground_truth
        })
    
    return dpo_data