<a href="https://colab.research.google.com/github/kumar045/Assignment-For-Filed/blob/main/Unsloth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import random
from typing import List, Dict
import tiktoken
import torch
from datasets import Dataset
from transformers import AutoTokenizer, TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel
import os
from huggingface_hub import HfApi, HfFolder
import subprocess
import gradio as gr

# Dataset generation functions

def generate_dataset(task_description: str, task_examples: List[Dict[str, str]], num_examples: int) -> List[Dict[str, str]]:
    dataset = []

    for _ in range(num_examples):
        example = random.choice(task_examples)
        instruction = example.get('instruction', task_description)
        input_text = example.get('input', '')
        output = example.get('output', '')

        dataset_item = {
            "instruction": instruction,
            "input": input_text,
            "output": output
        }
        dataset.append(dataset_item)

    return dataset

def save_dataset(dataset: List[Dict[str, str]], filename: str):
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(dataset, f, ensure_ascii=False, indent=2)

def load_dataset(filename: str) -> List[Dict[str, str]]:
    with open(filename, 'r', encoding='utf-8') as f:
        return json.load(f)

def count_tokens(text: str) -> int:
    encoding = tiktoken.get_encoding("cl100k_base")
    return len(encoding.encode(text))

def validate_dataset(dataset: List[Dict[str, str]], max_tokens: int = 2048) -> List[Dict[str, str]]:
    valid_dataset = []
    for item in dataset:
        total_tokens = sum(count_tokens(item[key]) for key in ["instruction", "input", "output"])
        if total_tokens <= max_tokens:
            valid_dataset.append(item)
    return valid_dataset

# Training functions

def prepare_dataset_for_training(dataset: List[Dict[str, str]]) -> Dataset:
    return Dataset.from_list(dataset)

def setup_model_and_tokenizer(model_name: str, max_seq_length: int):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=max_seq_length,
        load_in_4bit=True,
    )
    model = FastLanguageModel.get_peft_model(
        model,
        r=16,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_alpha=16,
        lora_dropout=0,
        bias="none",
        use_gradient_checkpointing="unsloth",
        random_state=3407,
        use_rslora=False,
    )
    return model, tokenizer

def train_model(model, tokenizer, dataset, output_dir, num_train_epochs, per_device_train_batch_size):
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=num_train_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        learning_rate=2e-4,
        fp16=not FastLanguageModel.is_bfloat16_supported(),
        bf16=FastLanguageModel.is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
    )

    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=dataset,
        dataset_text_field="text",
        max_seq_length=tokenizer.model_max_length,
        args=training_args,
    )

    trainer.train()
    return trainer

def save_model(trainer, output_dir):
    trainer.save_model(output_dir)

def push_to_hub(model, tokenizer, repo_name, hf_token=None):
    if hf_token:
        HfFolder.save_token(hf_token)

    model.push_to_hub(repo_name, use_auth_token=True)
    tokenizer.push_to_hub(repo_name, use_auth_token=True)

    return f"Model and tokenizer pushed to Hugging Face Hub: {repo_name}"

def quantize_model(model, tokenizer, output_dir, quantization_methods):
    gguf_paths = []
    for method in quantization_methods:
        print(f"Quantizing model to {method}...")
        gguf_path = model.save_pretrained_gguf(output_dir, tokenizer, quantization_method=method)
        gguf_paths.append(gguf_path)
        print(f"Quantized model ({method}) saved to {gguf_path}")
    return gguf_paths

def create_ollama_modelfile(gguf_path, model_name):
    modelfile_content = f"""
FROM {gguf_path}
TEMPLATE """Below are some instructions that describe a task. Write a response that appropriately completes the request.

### Instruction:
{{{{ .Prompt }}}}
{{{{ if .Input }}}}
### Input:
{{{{ .Input }}}}
{{{{ end }}}}
### Response:
{{{{ .Response }}}}"""
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|eot_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|end_of_text|>"
PARAMETER stop "<|reserved_special_token_"
"""
    modelfile_path = os.path.join(os.path.dirname(gguf_path), f"Modelfile_{os.path.basename(gguf_path)}")
    with open(modelfile_path, 'w') as f:
        f.write(modelfile_content)
    print(f"Ollama Modelfile created at {modelfile_path}")
    return modelfile_path

def convert_to_ollama(gguf_paths, model_name):
    ollama_model_names = []
    for gguf_path in gguf_paths:
        modelfile_path = create_ollama_modelfile(gguf_path, model_name)
        quant_method = os.path.basename(gguf_path).split('.')[1].lower()
        ollama_model_name = f"unsloth_{model_name.replace('-', '_').lower()}_{quant_method}"

        print(f"Converting model to Ollama format with name: {ollama_model_name}")
        subprocess.run(["ollama", "create", ollama_model_name, "-f", modelfile_path], check=True)
        print(f"Model converted and added to Ollama with name: {ollama_model_name}")
        ollama_model_names.append(ollama_model_name)
    return ollama_model_names

# Gradio UI functions

def generate_response(instruction, input_text, ollama_model_name):
    command = [
        "ollama", "run", ollama_model_name,
        f"Below are some instructions that describe a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n"
    ]
    if input_text:
        command[-1] += f"### Input:\n{input_text}\n"
    command[-1] += "### Response:"

    result = subprocess.run(command, capture_output=True, text=True)
    return result.stdout.strip()

def process_pipeline(task_description, example1_input, example1_output, example2_input, example2_output,
                     task_examples, num_examples, max_tokens, model_name, num_train_epochs,
                     per_device_train_batch_size, output_dir, push_to_hub, hub_model_id, hub_token, quant_versions):
    # Prepare examples
    examples = [
        {"instruction": task_description, "input": example1_input, "output": example1_output},
        {"instruction": task_description, "input": example2_input, "output": example2_output}
    ]

    if task_examples:
        examples.extend(json.loads(task_examples))

    # Generate dataset
    dataset = generate_dataset(task_description, examples, num_examples)
    valid_dataset = validate_dataset(dataset, max_tokens)

    output_file = f"{task_description.replace(' ', '_')[:30]}_dataset.json"
    save_dataset(valid_dataset, output_file)

    status = f"Dataset with {len(valid_dataset)} valid examples (out of {len(dataset)} generated) has been saved to {output_file}\n"
    status += f"Examples removed due to token limit: {len(dataset) - len(valid_dataset)}\n"

    # Prepare dataset for training
    train_dataset = prepare_dataset_for_training(valid_dataset)

    # Setup model and tokenizer
    model, tokenizer = setup_model_and_tokenizer(model_name, max_tokens)

    # Train model
    trainer = train_model(model, tokenizer, train_dataset, output_dir, num_train_epochs, per_device_train_batch_size)

    # Save model locally
    save_model(trainer, output_dir)
    status += f"Fine-tuned model has been saved to {output_dir}\n"

    # Push to Hugging Face Hub if requested
    if push_to_hub:
        if not hub_model_id:
            status += "Error: Please provide a Hub Model ID to push to Hugging Face Hub.\n"
        else:
            status += push_to_hub(model, tokenizer, hub_model_id, hub_token) + "\n"

    # Quantize model
    quant_output_dir = os.path.join(output_dir, "quantized")
    os.makedirs(quant_output_dir, exist_ok=True)

    quantization_methods = []
    if "Q4" in quant_versions:
        quantization_methods.append("q4_k_m")
    if "Q8" in quant_versions:
        quantization_methods.append("q8_0")

    gguf_paths = quantize_model(model, tokenizer, quant_output_dir, quantization_methods)

    # Convert to Ollama
    ollama_model_names = convert_to_ollama(gguf_paths, model_name.split('/')[-1])

    status += "Quantization and conversion completed.\n"
    for name in ollama_model_names:
        status += f"Model converted and added to Ollama with name: {name}\n"

    return status, ollama_model_names

def main_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# Flexible LLM Fine-tuning Pipeline with Multiple Quantization Options")

        with gr.Tab("Setup and Training"):
            task_description = gr.Textbox(label="Task Description", placeholder="e.g., Simplify the given text, or Translate the text to French")

            with gr.Row():
                with gr.Column():
                    example1_input = gr.Textbox(label="Example 1 Input", placeholder="Enter the input for the first example")
                    example1_output = gr.Textbox(label="Example 1 Output", placeholder="Enter the expected output for the first example")
                with gr.Column():
                    example2_input = gr.Textbox(label="Example 2 Input", placeholder="Enter the input for the second example")
                    example2_output = gr.Textbox(label="Example 2 Output", placeholder="Enter the expected output for the second example")

            task_examples = gr.Textbox(label="Additional Task Examples (JSON format)", placeholder='''[
    {"instruction": "Simplify this text:", "input": "The quantum mechanical model is based on mathematics of waves and introduces the concept of the electron cloud.", "output": "The quantum model of atoms uses math about waves. It says electrons move around the atom like a cloud."},
    {"instruction": "Translate to French:", "input": "Hello, how are you?", "output": "Bonjour, comment allez-vous ?"}
]''')
            num_examples = gr.Slider(minimum=10, maximum=1000, step=10, label="Number of examples to generate", value=100)
            max_tokens = gr.Slider(minimum=128, maximum=4096, step=128, label="Maximum tokens per example", value=2048)
            model_name = gr.Dropdown(["unsloth/llama-3-8b-bnb-4bit", "unsloth/mistral-7b-bnb-4bit"], label="Pretrained model name")
            num_train_epochs = gr.Slider(minimum=1, maximum=10, step=1, label="Number of training epochs", value=3)
            per_device_train_batch_size = gr.Slider(minimum=1, maximum=32, step=1, label="Batch size per device", value=2)
            output_dir = gr.Textbox(label="Output directory for fine-tuned model", value="./fine_tuned_model")
            push_to_hub = gr.Checkbox(label="Push to Hugging Face Hub")
            hub_model_id = gr.Textbox(label="Hugging Face Hub Model ID (e.g., 'username/model-name')")
            hub_token = gr.Textbox(label="Hugging Face Hub API Token", type="password")
            quant_versions = gr.CheckboxGroup(["Q4", "Q8"], label="Quantization Versions", value=["Q4"])

            start_button = gr.Button("Start Pipeline")
            output = gr.Textbox(label="Pipeline Status")
            chat_button = gr.Button("Open Chat Interface")

        with gr.Tab("Chat"):
            model_select = gr.Dropdown(label="Select Ollama Model")
            with gr.Row():
                instruction = gr.Textbox(label="Instruction", placeholder="Enter your instruction here...")
                input_text = gr.Textbox(label="Input (optional)", placeholder="Enter input text if needed...")
            chat_output = gr.Textbox(label="Model Output")
            chat_button = gr.Button("Generate Response")

        def update_model_list():
            result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
            models = [line.split()[0] for line in result.stdout.strip().split('\n')[1:]]  # Skip header
            return gr.Dropdown.update(choices=models)

        def start_pipeline(*args):
            status, ollama_model_names = process_pipeline(*args)
            return status, gr.Button.update(interactive=True), update_model_list()

        start_button.click(start_pipeline,
                           inputs=[task_description, example1_input, example1_output, example2_input, example2_output,
                                   task_examples, num_examples, max_tokens, model_name, num_train_epochs,
                                   per_device_train_batch_size, output_dir, push_to_hub, hub_model_id, hub_token, quant_versions],
                           outputs=[output, chat_button, model_select])

        def chat(instruction, input_text, model_name):
            if not model_name:
                return "Please select a model first."
            return generate_response(instruction, input_text, model_name)

        chat_button.click(chat, inputs=[instruction, input_text, model_select], outputs=[chat_output])

    demo.launch()

if __name__ == "__main__":
    main_interface()

In [None]:
import json
import random
from typing import List, Dict
import tiktoken
import torch
from datasets import Dataset
from transformers import AutoTokenizer, TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel
import os
from huggingface_hub import HfApi, HfFolder
import subprocess
import gradio as gr
import anthropic
import google.generativeai as genai
import openai

# Configuration for AI models
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Initialize AI models
claude = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
genai.configure(api_key=GOOGLE_API_KEY)
openai.api_key = OPENAI_API_KEY

# Unsloth model setup
unsloth_models = {
    "llama3.1": "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "mistral": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit"
}

# Dataset generation functions

def generate_dataset_with_ai(task_description: str, examples: List[Dict[str, str]], num_examples: int, ai_model: str) -> List[Dict[str, str]]:
    dataset = []

    for _ in range(num_examples):
        if ai_model == "claude":
            new_example = generate_with_claude(task_description, examples)
        elif ai_model == "gemini":
            new_example = generate_with_gemini(task_description, examples)
        elif ai_model == "openai":
            new_example = generate_with_openai(task_description, examples)
        elif ai_model in ["llama3.1", "mistral"]:
            new_example = generate_with_unsloth(task_description, examples, ai_model)
        else:
            new_example = random.choice(examples)  # Fallback to random selection

        dataset.append(new_example)

    return dataset

def generate_with_claude(task_description: str, examples: List[Dict[str, str]]) -> Dict[str, str]:
    prompt = f"Task: {task_description}\n\nExamples:\n"
    for ex in examples[:2]:  # Use first two examples
        prompt += f"Input: {ex['input']}\nOutput: {ex['output']}\n\n"
    prompt += "Generate a new, unique example following the same pattern:"

    response = claude.completions.create(
        model="claude-2",
        prompt=prompt,
        max_tokens_to_sample=300
    )

    # Parse Claude's response
    lines = response.completion.strip().split('\n')
    new_input = lines[0].replace("Input: ", "")
    new_output = lines[1].replace("Output: ", "")

    return {"instruction": task_description, "input": new_input, "output": new_output}

def generate_with_gemini(task_description: str, examples: List[Dict[str, str]]) -> Dict[str, str]:
    prompt = f"Task: {task_description}\n\nExamples:\n"
    for ex in examples[:2]:  # Use first two examples
        prompt += f"Input: {ex['input']}\nOutput: {ex['output']}\n\n"
    prompt += "Generate a new, unique example following the same pattern. Respond with just the Input and Output, no additional text."

    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content(prompt)

    # Parse Gemini's response
    lines = response.text.strip().split('\n')
    new_input = lines[0].replace("Input: ", "")
    new_output = lines[1].replace("Output: ", "")

    return {"instruction": task_description, "input": new_input, "output": new_output}

def generate_with_openai(task_description: str, examples: List[Dict[str, str]]) -> Dict[str, str]:
    prompt = f"Task: {task_description}\n\nExamples:\n"
    for ex in examples[:2]:  # Use first two examples
        prompt += f"Input: {ex['input']}\nOutput: {ex['output']}\n\n"
    prompt += "Generate a new, unique example following the same pattern. Respond with just the Input and Output, no additional text."

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )

    # Parse OpenAI's response
    lines = response.choices[0].message.content.strip().split('\n')
    new_input = lines[0].replace("Input: ", "")
    new_output = lines[1].replace("Output: ", "")

    return {"instruction": task_description, "input": new_input, "output": new_output}

def generate_with_unsloth(task_description: str, examples: List[Dict[str, str]], model_type: str) -> Dict[str, str]:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=unsloth_models[model_type],
        max_seq_length=2048,
        load_in_4bit=True,
    )
    FastLanguageModel.for_inference(model)

    prompt = f"Task: {task_description}\n\nExamples:\n"
    for ex in examples[:2]:  # Use first two examples
        prompt += f"Input: {ex['input']}\nOutput: {ex['output']}\n\n"
    prompt += "Generate a new, unique example following the same pattern. Respond with just the Input and Output, no additional text."

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
    output = model.generate(input_ids, max_new_tokens=100)
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    # Parse Unsloth model's response
    lines = response.strip().split('\n')
    new_input = lines[-2].replace("Input: ", "")
    new_output = lines[-1].replace("Output: ", "")

    return {"instruction": task_description, "input": new_input, "output": new_output}

def save_dataset(dataset: List[Dict[str, str]], filename: str):
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(dataset, f, ensure_ascii=False, indent=2)

def load_dataset(filename: str) -> List[Dict[str, str]]:
    with open(filename, 'r', encoding='utf-8') as f:
        return json.load(f)

def count_tokens(text: str) -> int:
    encoding = tiktoken.get_encoding("cl100k_base")
    return len(encoding.encode(text))

def validate_dataset(dataset: List[Dict[str, str]], max_tokens: int = 2048) -> List[Dict[str, str]]:
    valid_dataset = []
    for item in dataset:
        total_tokens = sum(count_tokens(item[key]) for key in ["instruction", "input", "output"])
        if total_tokens <= max_tokens:
            valid_dataset.append(item)
    return valid_dataset

# Training functions

def prepare_dataset_for_training(dataset: List[Dict[str, str]]) -> Dataset:
    return Dataset.from_list(dataset)

def setup_model_and_tokenizer(model_name: str, max_seq_length: int):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=max_seq_length,
        load_in_4bit=True,
    )
    model = FastLanguageModel.get_peft_model(
        model,
        r=16,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_alpha=16,
        lora_dropout=0,
        bias="none",
        use_gradient_checkpointing="unsloth",
        random_state=3407,
        use_rslora=False,
    )
    return model, tokenizer

def train_model(model, tokenizer, dataset, output_dir, num_train_epochs, per_device_train_batch_size):
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=num_train_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        learning_rate=2e-4,
        fp16=not FastLanguageModel.is_bfloat16_supported(),
        bf16=FastLanguageModel.is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
    )

    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=dataset,
        dataset_text_field="text",
        max_seq_length=tokenizer.model_max_length,
        args=training_args,
    )

    trainer.train()
    return trainer

def save_model(trainer, output_dir):
    trainer.save_model(output_dir)

def push_to_hub(model, tokenizer, repo_name, hf_token=None):
    if hf_token:
        HfFolder.save_token(hf_token)

    model.push_to_hub(repo_name, use_auth_token=True)
    tokenizer.push_to_hub(repo_name, use_auth_token=True)

    return f"Model and tokenizer pushed to Hugging Face Hub: {repo_name}"

def quantize_model(model, tokenizer, output_dir, quantization_methods):
    gguf_paths = []
    for method in quantization_methods:
        print(f"Quantizing model to {method}...")
        gguf_path = model.save_pretrained_gguf(output_dir, tokenizer, quantization_method=method)
        gguf_paths.append(gguf_path)
        print(f"Quantized model ({method}) saved to {gguf_path}")
    return gguf_paths

def create_ollama_modelfile(gguf_path, model_name):
    modelfile_content = f"""
FROM {gguf_path}
TEMPLATE """Below are some instructions that describe a task. Write a response that appropriately completes the request.

### Instruction:
{{{{ .Prompt }}}}
{{{{ if .Input }}}}
### Input:
{{{{ .Input }}}}
{{{{ end }}}}
### Response:
{{{{ .Response }}}}"""
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|eot_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|end_of_text|>"
PARAMETER stop "<|reserved_special_token_"
"""
    modelfile_path = os.path.join(os.path.dirname(gguf_path), f"Modelfile_{os.path.basename(gguf_path)}")
    with open(modelfile_path, 'w') as f:
        f.write(modelfile_content)
    print(f"Ollama Modelfile created at {modelfile_path}")
    return modelfile_path

def convert_to_ollama(gguf_paths, model_name):
    ollama_model_names = []
    for gguf_path in gguf_paths:
        modelfile_path = create_ollama_modelfile(gguf_path, model_name)
        quant_method = os.path.basename(gguf_path).split('.')[1].lower()
        ollama_model_name = f"unsloth_{model_name.replace('-', '_').lower()}_{quant_method}"

        print(f"Converting model to Ollama format with name: {ollama_model_name}")
        subprocess.run(["ollama", "create", ollama_model_name, "-f", modelfile_path], check=True)
        print(f"Model converted and added to Ollama with name: {ollama_model_name}")
        ollama_model_names.append(ollama_model_name)
    return ollama_model_names

# Gradio UI functions

def generate_response(instruction, input_text, ollama_model_name):
    command = [
        "ollama", "run", ollama_model_name,
        f"Below are some instructions that describe a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n"
    ]
    if input_text:
        command[-1] += f"### Input:\n{input_text}\n"
    command[-1] += "### Response:"

    result = subprocess.run(command, capture_output=True, text=True)
    return result.stdout.strip()

def process_pipeline(task_description, example1_input, example1_output, example2_input, example2_output,
                     task_examples, num_examples, max_tokens, model_name, num_train_epochs,
                     per_device_train_batch_size, output_dir, push_to_hub, hub_model_id, hub_token,
                     quant_versions, dataset_generation_model):
    # Prepare examples
    examples = [
        {"instruction": task_description, "input": example1_input, "output": example1_output},
        {"instruction": task_description, "input": example2_input, "output": example2_output}
    ]

    if task_examples:
        examples.extend(json.loads(task_examples))

    # Generate dataset
    dataset = generate_dataset_with_ai(task_description, examples, num_examples, dataset_generation_model)

    valid_dataset = validate_dataset(dataset, max_tokens)

    output_file = f"{task_description.replace(' ', '_')[:30]}_dataset.json"
    save_dataset(valid_dataset, output_file)

    status = f"Dataset with {len(valid_dataset)} valid examples (out of {len(dataset)} generated) has been saved to {output_file}\n"
    status += f"Examples removed due to token limit: {len(dataset) - len(valid_dataset)}\n"

    # Prepare dataset for training
    train_dataset = prepare_dataset_for_training(valid_dataset)

    # Setup model and tokenizer
    model, tokenizer = setup_model_and_tokenizer(model_name, max_tokens)

    # Train model
    trainer = train_model(model, tokenizer, train_dataset, output_dir, num_train_epochs, per_device_train_batch_size)

    # Save model locally
    save_model(trainer, output_dir)
    status += f"Fine-tuned model has been saved to {output_dir}\n"

    # Push to Hugging Face Hub if requested
    # Push to Hugging Face Hub if requested
    if push_to_hub:
        if not hub_model_id:
            status += "Error: Please provide a Hub Model ID to push to Hugging Face Hub.\n"
        else:
            status += push_to_hub(model, tokenizer, hub_model_id, hub_token) + "\n"

    # Quantize model
    quant_output_dir = os.path.join(output_dir, "quantized")
    os.makedirs(quant_output_dir, exist_ok=True)

    quantization_methods = []
    if "Q4" in quant_versions:
        quantization_methods.append("q4_k_m")
    if "Q8" in quant_versions:
        quantization_methods.append("q8_0")

    gguf_paths = quantize_model(model, tokenizer, quant_output_dir, quantization_methods)

    # Convert to Ollama
    ollama_model_names = convert_to_ollama(gguf_paths, model_name.split('/')[-1])

    status += "Quantization and conversion completed.\n"
    for name in ollama_model_names:
        status += f"Model converted and added to Ollama with name: {name}\n"

    return status, ollama_model_names

def main_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# Flexible LLM Fine-tuning Pipeline with AI-Assisted Dataset Generation")

        with gr.Tab("Setup and Training"):
            task_description = gr.Textbox(label="Task Description", placeholder="e.g., Simplify the given text, or Translate the text to French")

            with gr.Row():
                with gr.Column():
                    example1_input = gr.Textbox(label="Example 1 Input", placeholder="Enter the input for the first example")
                    example1_output = gr.Textbox(label="Example 1 Output", placeholder="Enter the expected output for the first example")
                with gr.Column():
                    example2_input = gr.Textbox(label="Example 2 Input", placeholder="Enter the input for the second example")
                    example2_output = gr.Textbox(label="Example 2 Output", placeholder="Enter the expected output for the second example")

            task_examples = gr.Textbox(label="Additional Task Examples (JSON format)", placeholder='''[
    {"instruction": "Simplify this text:", "input": "The quantum mechanical model is based on mathematics of waves and introduces the concept of the electron cloud.", "output": "The quantum model of atoms uses math about waves. It says electrons move around the atom like a cloud."},
    {"instruction": "Translate to French:", "input": "Hello, how are you?", "output": "Bonjour, comment allez-vous ?"}
]''')
            num_examples = gr.Slider(minimum=10, maximum=1000, step=10, label="Number of examples to generate", value=100)
            dataset_generation_model = gr.Radio(["self", "claude", "gemini", "openai", "llama3.1", "mistral"], label="Dataset Generation Model", value="self")
            max_tokens = gr.Slider(minimum=128, maximum=4096, step=128, label="Maximum tokens per example", value=2048)
            model_name = gr.Dropdown(list(unsloth_models.values()), label="Pretrained model name")
            num_train_epochs = gr.Slider(minimum=1, maximum=10, step=1, label="Number of training epochs", value=3)
            per_device_train_batch_size = gr.Slider(minimum=1, maximum=32, step=1, label="Batch size per device", value=2)
            output_dir = gr.Textbox(label="Output directory for fine-tuned model", value="./fine_tuned_model")
            push_to_hub = gr.Checkbox(label="Push to Hugging Face Hub")
            hub_model_id = gr.Textbox(label="Hugging Face Hub Model ID (e.g., 'username/model-name')")
            hub_token = gr.Textbox(label="Hugging Face Hub API Token", type="password")
            quant_versions = gr.CheckboxGroup(["Q4", "Q8"], label="Quantization Versions", value=["Q4"])

            start_button = gr.Button("Start Pipeline")
            output = gr.Textbox(label="Pipeline Status")
            chat_button = gr.Button("Open Chat Interface")

        with gr.Tab("Chat"):
            model_select = gr.Dropdown(label="Select Ollama Model")
            with gr.Row():
                instruction = gr.Textbox(label="Instruction", placeholder="Enter your instruction here...")
                input_text = gr.Textbox(label="Input (optional)", placeholder="Enter input text if needed...")
            chat_output = gr.Textbox(label="Model Output")
            chat_button = gr.Button("Generate Response")

        def update_model_list():
            result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
            models = [line.split()[0] for line in result.stdout.strip().split('\n')[1:]]  # Skip header
            return gr.Dropdown.update(choices=models)

        def start_pipeline(*args):
            status, ollama_model_names = process_pipeline(*args)
            return status, gr.Button.update(interactive=True), update_model_list()

        start_button.click(start_pipeline,
                           inputs=[task_description, example1_input, example1_output, example2_input, example2_output,
                                   task_examples, num_examples, max_tokens, model_name, num_train_epochs,
                                   per_device_train_batch_size, output_dir, push_to_hub, hub_model_id, hub_token,
                                   quant_versions, dataset_generation_model],
                           outputs=[output, chat_button, model_select])

        def chat(instruction, input_text, model_name):
            if not model_name:
                return "Please select a model first."
            return generate_response(instruction, input_text, model_name)

        chat_button.click(chat, inputs=[instruction, input_text, model_select], outputs=[chat_output])

    demo.launch()

if __name__ == "__main__":
    main_interface()