<a href="https://colab.research.google.com/github/sanjayy0612/zero_./blob/main/train/zero_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q --upgrade pip
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install -q transformers accelerate datasets peft bitsandbytes


In [None]:
from datasets import load_dataset
dataset = load_dataset("AnishJoshi/nl2bash-custom", split="train")
dataset = dataset.train_test_split(test_size=0.2)  # Train/Test split
print(dataset)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("google/codegemma-2b")
model = AutoModelForCausalLM.from_pretrained("google/codegemma-2b")

# Example usage
input_text = "Write a Python function to reverse a string."
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**inputs)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))




In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
model_name = "google/codegemma-2b"

# Quantization config (4-bit)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
'''
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")

tokenizer.pad_token = tokenizer.eos_token
'''

In [None]:

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

def preprocess(example):

    prompt = f"Instruction: {example['nl_command']}\nOutput: {example['bash_code']}{tokenizer.eos_token}"


    tokenized_prompt = tokenizer(prompt, truncation=True, max_length=192, padding="max_length")


    tokenized_prompt["labels"] = tokenized_prompt["input_ids"][:]

    return tokenized_prompt


tokenized_dataset = dataset.map(preprocess, remove_columns=['nl_command', 'bash_code'])

print("Preprocessing complete. Example of a tokenized sample:")
print(tokenized_dataset['train'][0].keys())

In [None]:
train_dataset_part1 = tokenized_dataset["train"].select(range(5000))
train_dataset_part2 = tokenized_dataset["train"].select(range(5000,10000))

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

from google.colab import drive
drive.mount('/content/drive')


model = prepare_model_for_kbit_training(model)


lora_config = LoraConfig(
    r=16,
    lora_alpha=32,

    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)


model = get_peft_model(model, lora_config)



training_args_part1 = TrainingArguments(
    output_dir="/content/drive/MyDrive/codegemma_nl2bash_finetuned/run1_5k_checkpoint",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=50,
    save_strategy="epoch",
    report_to="none"
)


data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer_part1 = Trainer(
    model=model,
    args=training_args_part1,
    train_dataset=train_dataset_part1,

    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

print("--- Starting Training: Part 1 (0 to 5,000 examples) ---")
trainer_part1.train()
print("--- Part 1 Complete. Saving final model for this run. ---")
trainer_part1.save_model("/content/drive/MyDrive/codegemma_nl2bash_finetuned/run1_5k_final")

In [None]:

from google.colab import drive
drive.mount('/content/drive')


# Import all necessary libraries
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# STEP 2: Define the quantization configuration
# This must be the same as what you used for training
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Path to your saved fine-tuned model
model_path = "/content/drive/MyDrive/codegemma_nl2bash_finetuned/run2_10k_final"

print("\nLoading model... (This may take a moment)")
# Reload base model + tokenizer
base_model = AutoModelForCausalLM.from_pretrained(
    "google/codegemma-2b",
    quantization_config=bnb_config, # Now bnb_config is defined
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("google/codegemma-2b")
tokenizer.pad_token = tokenizer.eos_token

# Load LoRA adapters on top of base model
model = PeftModel.from_pretrained(base_model, model_path)
model.eval()
print("Model loaded successfully!")

# CORRECTED Inference Function
def generate_bash(nl_query):
    prompt = f"Instruction: {nl_query}\nOutput:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.1,
            do_sample=True
        )
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    try:
        bash_command = full_output.split("Output:")[1].strip()
    except IndexError:
        bash_command = "Error: Could not parse model output."
    return bash_command

# ---- Test Example ----
test_query = "Create a new directory called 'my_project'"
result = generate_bash(test_query)

print("\n--- INFERENCE RESULT ---")
print("Natural Language:", test_query)
print("---------------------------------")
print("Generated Bash Command:", result)

# NEXT 5K data


In [None]:


import torch
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# 1. Prepare the second data slice
print("Preparing the next data slice (5,000 to 10,000)...")
train_dataset_part2 = tokenized_dataset["train"].select(range(5000, 10000))
print(f"Data slice ready. Number of examples: {len(train_dataset_part2)}")

# 2. Define path to the model from Session 1
model_path_run1 = "/content/drive/MyDrive/codegemma_nl2bash_finetuned/run1_5k_final"

# 3. Reload the base model
print(f"Loading base model 'google/codegemma-2b'...")
base_model = AutoModelForCausalLM.from_pretrained(
    "google/codegemma-2b",
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("google/codegemma-2b")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# 4. Load LoRA adapters with the FIX
print(f"Loading fine-tuned adapters from: {model_path_run1}")
model = PeftModel.from_pretrained(base_model, model_path_run1, is_trainable=True) # <-- THE FIX IS HERE
print("Adapters from Part 1 loaded successfully.")

# Sanity Check: Print the number of trainable parameters
model.print_trainable_parameters()

# 5. Set up training arguments
training_args_part2 = TrainingArguments(
    output_dir="/content/drive/MyDrive/codegemma_nl2bash_finetuned/run2_10k_checkpoint",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=50,
    save_strategy="epoch",
    report_to="none"
)

# 6. Create the Trainer
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
trainer_part2 = Trainer(
    model=model,
    args=training_args_part2,
    train_dataset=train_dataset_part2,
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 7. Start training!
print("\n--- Starting Training: Part 2 (5,000 to 10,000 examples) ---")
trainer_part2.train()

# 8. Save the final model
print("--- Part 2 Complete. Saving final model. ---")
trainer_part2.save_model("/content/drive/MyDrive/codegemma_nl2bash_finetuned/run2_10k_final")

In [None]:

test_prompts = [
    "Create a single directory named 'my_project'",
    "Make just one folder called 'test_folder'",
    "Create only the directory 'docs'"
]

print("--- Testing Model with Engineered Prompts ---")
print("Model: Trained on 10k examples")
print("Temperature: 0.1")
print("-" * 40)

# Loop through each prompt and print the result
for i, prompt in enumerate(test_prompts):
    print(f"Test #{i+1}")
    print(f"Prompt: '{prompt}'")

    # Generate the command using your function
    generated_command = generate_bash(prompt)

    print(f"Result: {generated_command}\n")

In [None]:


import torch
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,  # <-- This was the missing import
    DataCollatorForLanguageModeling
)

print("Preparing for the final training session...")

# Define the quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# 1. Prepare the final data slice
final_slice_start = 10000
final_slice_end = len(tokenized_dataset["train"])
train_dataset_part3 = tokenized_dataset["train"].select(range(final_slice_start, final_slice_end))
print(f"Data slice ready. Training on examples from {final_slice_start} to {final_slice_end}.")

# 2. Define path to the model from Session 2
model_path_run2 = "/content/drive/MyDrive/codegemma_nl2bash_finetuned/run2_10k_final"

# 3. Reload the base model
print(f"Loading base model 'google/codegemma-b'...")
base_model = AutoModelForCausalLM.from_pretrained(
    "google/codegemma-2b",
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("google/codegemma-2b")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# 4. Load LoRA adapters from Session 2, making them trainable
print(f"Loading fine-tuned adapters from: {model_path_run2}")
model = PeftModel.from_pretrained(base_model, model_path_run2, is_trainable=True)
print("Adapters from Part 2 loaded successfully.")
model.print_trainable_parameters()

# 5. Set up final training arguments
training_args_part3 = TrainingArguments(
    output_dir="/content/drive/MyDrive/codegemma_nl2bash_finetuned/run3_final_checkpoint",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=50,
    save_strategy="epoch",
    report_to="none"
)

# 6. Create the final Trainer
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
trainer_part3 = Trainer(
    model=model,
    args=training_args_part3,
    train_dataset=train_dataset_part3,
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 7. Start the final training run!
print("\n--- Starting Final Training: Part 3 ---")
trainer_part3.train()

# 8. Save the final, fully-trained model
print("--- Final Training Complete. Saving the final model. ---")
trainer_part3.save_model("/content/drive/MyDrive/codegemma_nl2bash_finetuned/final_model_15k")

In [None]:


print("Testing the final model trained on all 15,726 examples...")

# Path to your FINAL, fully-trained model
model_path = "/content/drive/MyDrive/codegemma_nl2bash_finetuned/final_model_15k"

# Reload the model to ensure we're using the latest version
print("Loading final model...")
base_model = AutoModelForCausalLM.from_pretrained("google/codegemma-2b", quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("google/codegemma-2b")
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(base_model, model_path)
model.eval()
print("Model loaded successfully!")

newline_token_id = tokenizer.encode('\n', add_special_tokens=False)[0]

# Using the perfected generate_bash function
def generate_bash(nl_query):
    prompt = f"Instruction: {nl_query}\nOutput:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model.generate(
    inputs["input_ids"].to(model.device),
    max_new_tokens=50,       # prevent runaway sequences
    temperature=0.0,         # greedy decoding
    top_p=0.9,
    do_sample=False
)

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    try:
        bash_command = full_output.split("Output:")[1].strip()
    except IndexError:
        bash_command = "Error: Could not parse model output."
    return bash_command

# --- Test Cases ---
prompts_to_test = [
    # The original, ambiguous prompt that caused issues
    "Create a new directory called 'my_project'",
    # The specific, engineered prompt
    "Create a single directory named 'my_project'"
]

print("\n--- Running Final Evaluation ---")
print("-" * 40)

for i, prompt in enumerate(prompts_to_test):
    print(f"Test #{i+1}")
    print(f"Prompt: '{prompt}'")

    generated_command = generate_bash(prompt)

    print(f"Result: {generated_command}\n")

# Reduction of hallucination and overfitting retraining the 5k M


In [None]:
import json
import random

print("Generating a high-quality correctional dataset of 500 examples...")

# A large and diverse pool of realistic directory names
dir_names = [
    "src", "lib", "dist", "build", "assets", "components", "routes", "models",
    "controllers", "services", "utils", "helpers", "middleware", "config", "public",
    "static", "templates", "views", "tests", "docs", "data", "logs", "scripts",
    "notebooks", "api", "_includes", ".vscode", "migrations", "seeders", "database",
    "core", "app", "main", "handlers", "modules", "plugins", "themes", "widgets",
    "styles", "css", "js", "img", "fonts", "sass", "less", "dist-css", "build-js",
    "server", "client", "shared", "common", "packages", "examples", "demo", "vendor",
    "third_party", "bin", "etc", "var", "tmp", "temp", "backups", "uploads", "media",
    "screenshots", "videos", "audio", "content", "pages", "posts", "layouts",
    "production", "development", "staging", "dockerfiles", "kubernetes",
    "_data", "functions", "lambda", "hooks", "providers", "contexts", "reducers",
    "actions", "store", "features", "epics", "sagas", "docker", ".github", "ci",
    "workflows", "dev-docs", "user_guides", "marketing-assets", "legal", "archive",
    "dist-v1", "release-build", "final-assets", "go-project", "python_app",
    "node_server", "react-client", "infra", "terraform", "ansible", "images_final"
]

# A variety of templates for natural language commands
prompt_templates = [
    lambda d: f"create a single directory named '{d}'",
    lambda d: f"make the folder '{d}'",
    lambda d: f"make a directory called '{d}'",
    lambda d: f"create a folder for {d}",
    lambda d: f"new folder: '{d}'",
    lambda d: f"make just one directory called '{d}'",
    lambda d: f"create the '{d}' directory",
    lambda d: f"form a directory, name it '{d}'",
    lambda d: f"generate a directory called '{d}'",
    lambda d: f"please make a folder called '{d}'",
    lambda d: f"I need a directory, call it '{d}'",
    lambda d: f"folder '{d}'",
    lambda d: f"directory '{d}'",
    lambda d: f"make a dir '{d}'",
    lambda d: f"create dir '{d}'",
    lambda d: f"I want a new folder, name it '{d}'",
    lambda d: f"can you create the '{d}' directory?",
    lambda d: f"just the directory '{d}', please",
    lambda d: f"only create the folder '{d}'"
]

correctional_data = []
used_combinations = set()

# Generate 500 unique examples
while len(correctional_data) < 500:
    dir_name = random.choice(dir_names)
    template = random.choice(prompt_templates)

    nl_command = template(dir_name)
    bash_command = f"mkdir {dir_name}"

    # Ensure we don't have duplicate entries
    if (nl_command, bash_command) not in used_combinations:
        correctional_data.append({"nl_command": nl_command, "bash_code": bash_command})
        used_combinations.add((nl_command, bash_command))

# The path to save the new file in your Google Drive
file_path = "/content/drive/MyDrive/correctional_dataset_500.jsonl"

# Write the data to a .jsonl file
with open(file_path, 'w') as f:
    for entry in correctional_data:
        f.write(json.dumps(entry) + '\n')

print("-" * 50)
print(f"Successfully created correctional dataset at: {file_path}")
print(f"It contains {len(correctional_data)} high-quality, unique examples.")
print("-" * 50)
print("Example #1:", correctional_data[0])
print("Example #2:", correctional_data[1])

In [None]:
print(len(correctional_data))

In [None]:


import torch
from datasets import load_dataset
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)

print("Starting the correctional fine-tuning process...")

# --- 1. Load the new, high-quality dataset ---
correctional_dataset_path = "/content/drive/MyDrive/correctional_dataset_500.jsonl"
print(f"Loading correctional dataset from: {correctional_dataset_path}")
dataset = load_dataset("json", data_files=correctional_dataset_path, split="train")

# Split the small dataset into training and testing sets (90% train, 10% test)
dataset = dataset.train_test_split(test_size=0.1)
print("Dataset loaded and split:", dataset)


# --- 2. Preprocess the dataset ---
def preprocess(example):
    prompt = f"Instruction: {example['nl_command']}\nOutput: {example['bash_code']}"
    return tokenizer(prompt, truncation=True, max_length=128)


# --- 3. Load our starting model (the 5k version) ---
# Define quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Path to our best model from Session 1
model_path_run1 = "/content/drive/MyDrive/codegemma_nl2bash_finetuned/run1_5k_final"

print(f"Loading base model and adapters from: {model_path_run1}")
base_model = AutoModelForCausalLM.from_pretrained(
    "google/codegemma-2b",
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("google/codegemma-2b")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# Load the adapters and make them trainable
model = PeftModel.from_pretrained(base_model, model_path_run1, is_trainable=True)
print("Base model and 5k adapters loaded successfully.")
model.print_trainable_parameters()

# Apply the preprocessing to the new dataset
tokenized_dataset = dataset.map(preprocess, remove_columns=['nl_command', 'bash_code'])


# --- 4. Define specialized Training Arguments ---
# We use a lower learning rate and fewer epochs for this delicate operation
correctional_training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/codegemma_surgically_corrected_checkpoint",
    num_train_epochs=2,         # Only 2 epochs is enough for this small dataset
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,         # Lower learning rate for fine adjustments
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none"
)


# --- 5. Run the Correctional Training ---
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
trainer = Trainer(
    model=model,
    args=correctional_training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

print("\n--- Starting the Surgical Strike Fine-Tuning ---")
trainer.train()

# --- 6. Save the final, corrected model ---
print("--- Correctional Training Complete. Saving the final corrected model. ---")
trainer.save_model("/content/drive/MyDrive/codegemma_surgically_corrected_model")

print("\nProcess complete! The new model should have its 'mkdir' behavior corrected.")

In [None]:


print("Testing the final, surgically corrected model...")

# Path to the final, corrected model
model_path = "/content/drive/MyDrive/codegemma_surgically_corrected_model"

# Reload the model
print("Loading final model...")
base_model = AutoModelForCausalLM.from_pretrained("google/codegemma-2b", quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("google/codegemma-2b")
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(base_model, model_path)
model.eval()
print("Model loaded successfully!")

# Using the perfected generate_bash function
newline_token_id = tokenizer.encode('\n', add_special_tokens=False)[0]
def generate_bash(nl_query):
    prompt = f"Instruction: {nl_query}\nOutput:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.1,
            do_sample=True,
            eos_token_id=newline_token_id
        )
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    try:
        bash_command = full_output.split("Output:")[1].strip()
    except IndexError:
        bash_command = "Error: Could not parse model output."
    return bash_command

# --- Test Cases ---
prompts_to_test = [
    # Test 1: The original ambiguous prompt. Will it be literal now?
    "Create a new directory called 'my_project'",
    # Test 2: The specific prompt. This MUST work.
    "Create a single directory named 'my_project_2'",
    # Test 3: A different command to check for damage.
    "list all files and folders in the current directory in a long format"
]

print("\n--- Running Final Evaluation ---")
print("-" * 40)

for i, prompt in enumerate(prompts_to_test):
    print(f"Test #{i+1}")
    print(f"Prompt: '{prompt}'")
    generated_command = generate_bash(prompt)
    print(f"Result: '{generated_command}'\n")

# Merging the lora file with modelfiles


In [None]:
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from google.colab import userdata
from huggingface_hub import login

from google.colab import drive
drive.mount('/content/drive')
print("Starting model merge and upload process...")

# --- 1. Load the Base Model and the Fine-Tuned Adapters ---

# Path to your final, surgically corrected model
adapter_path = "/content/drive/MyDrive/codegemma_surgically_corrected_model"
base_model_name = "google/codegemma-2b"

# Load the base model in full precision (bfloat16) for merging
print("Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.pad_token = tokenizer.eos_token

print("Loading your fine-tuned adapters...")
# Load the LoRA model
model = PeftModel.from_pretrained(base_model, adapter_path)


# --- 2. Merge the Adapters into the Base Model ---

print("Merging adapters into the base model...")
model = model.merge_and_unload()
print("Merge complete.")


# --- 3. Log in to Hugging Face Hub ---

# Make sure your HF_TOKEN with 'write' permission is in your Colab secrets
hf_token = userdata.get('HF_TOKEN')
if not hf_token:
    print("❌ HF_TOKEN not found in Colab secrets. Please add it before running.")
else:
    login(token=hf_token)
    print("✅ Successfully logged into Hugging Face Hub.")


# --- 4. Push the Merged Model and Tokenizer to the Hub ---

# This has been updated with your new repository name from the screenshot
new_repo_name = "Sanjayyy06/zero-nl2cmds-v1"

print(f"\nPushing merged model to Hub repository: {new_repo_name}")
print("This may take a few minutes...")

try:
    # Push the model
    model.push_to_hub(
        new_repo_name,
        commit_message="Initial commit of surgically corrected model v1",
        private=False
    )

    # Push the tokenizer
    tokenizer.push_to_hub(
        new_repo_name,
        commit_message="Add tokenizer"
    )

    print("\n✅✅✅ Successfully pushed model and tokenizer to the Hugging Face Hub!")
    print(f"You can find your model at: https://huggingface.co/{new_repo_name}")

except Exception as e:
    print(f"\n❌ An error occurred during the push: {e}")

# testing the merges verision model


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

print("Testing the final merged model from the Hugging Face Hub...")

# --- 1. Define Model and Quantization Config ---

# The Hugging Face Hub repository ID for your merged model
model_id = "Sanjayyy06/zero-nl2cmds-v1"

# Set up 4-bit quantization for efficient inference in Colab
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# --- 2. Load Model and Tokenizer from the Hub ---

print(f"Loading model: {model_id}")
# This is now a standard, one-step loading process
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)
model.eval()
print("Model loaded successfully!")


# --- 3. The Inference Function ---
# (Same perfected function as before)
newline_token_id = tokenizer.encode('\n', add_special_tokens=False)[0]

def generate_bash(nl_query):
    prompt = f"Instruction: {nl_query}\nOutput:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.1,
            do_sample=True,
            eos_token_id=newline_token_id
        )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    try:
        bash_command = full_output.split("Output:")[1].strip()
    except IndexError:
        bash_command = "Error: Could not parse model output."

    return bash_command


# --- 4. Run the Final Test Cases ---
prompts_to_test = [
    # The ambiguous prompt that previously failed
    "Create a new directory called 'my_project'",
    # The specific prompt
    "Create a single directory named 'my_project_2'",
    # The general knowledge check
    "list all files and folders in the current directory in a long format"
]

print("\n--- Running Final Evaluation ---")
print("-" * 40)

for i, prompt in enumerate(prompts_to_test):
    print(f"Test #{i+1}")
    print(f"Prompt: '{prompt}'")
    generated_command = generate_bash(prompt)
    print(f"Result: '{generated_command}'\n")

In [None]:
# Block 1: Setup and Build

# --- Mount Google Drive ---
from google.colab import drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# --- Clean Slate & Install Packages ---
print("\nCleaning up previous runs and installing packages...")
!rm -rf /content/llama.cpp
!rm -rf /content/hub_model
!pip install llama-cpp-python huggingface_hub sentencepiece protobuf mistral_common -q

# --- Clone llama.cpp Repo ---
print("\nCloning the llama.cpp repository...")
!git clone https://github.com/ggerganov/llama.cpp.git /content/llama.cpp

# --- Build the Tools with CMake ---
print("\nBuilding the conversion tools (this will take a few minutes)...")
!cd /content/llama.cpp && mkdir -p build && cd build && cmake .. && cmake --build . --config Release

print("\n✅ Block 1 Complete: Environment is ready.")

In [None]:
# Block 2: Download & Convert (Robust Version)

import os
from huggingface_hub import snapshot_download

# --- Download Your Model ---
model_id = "Sanjayyy06/zero-nl2cmds-v1"
model_dir = "/content/hub_model"
print(f"Downloading model '{model_id}' from the Hub...")
snapshot_download(repo_id=model_id, local_dir=model_dir, local_dir_use_symlinks=False)
print("Download complete.")

# --- Dynamically Find and Run the Conversion Script ---
# **THIS IS THE CRITICAL FIX**
fp16_gguf_path = "/content/model-f16.gguf"
print("\nFinding and running the correct conversion script...")

# This shell command block finds the script, checks if it exists, and then runs it.
convert_command = f"""
set -e
CONVERT_SCRIPT_PATH=$(find /content/llama.cpp -name "*convert*.py" | head -n 1)

if [ -z "$CONVERT_SCRIPT_PATH" ]; then
    echo "❌ CRITICAL ERROR: Could not find the model conversion python script in the llama.cpp repository."
    exit 1
fi

echo "✅ Found conversion script at: $CONVERT_SCRIPT_PATH"
python3 "$CONVERT_SCRIPT_PATH" {model_dir} --outfile {fp16_gguf_path} --outtype f16
"""
get_ipython().system(convert_command)


# --- Check if conversion was successful before proceeding ---
if not os.path.exists(fp16_gguf_path):
    print("\n❌ ERROR: Conversion to FP16 GGUF failed. The intermediate file was not created. Please check the logs above.")
else:
    print("Conversion to FP16 GGUF successful.")
    # --- Quantize to 4-bit GGUF ---
    final_gguf_name = "zero-nl2cmds-v1.Q4_K_M.gguf"
    final_gguf_path_colab = f"/content/{final_gguf_name}"
    print(f"\nQuantizing to final GGUF file: {final_gguf_name}...")
    !/content/llama.cpp/build/bin/llama-quantize {fp16_gguf_path} {final_gguf_path_colab} Q4_K_M

    print("\n✅ Block 2 Complete: The final GGUF file has been created in the Colab environment.")
    print("You can see it listed below:")
    !ls -lh {final_gguf_path_colab}

In [None]:
# Block 3: Move to Google Drive and Clean Up

import os

# --- Define Paths ---
final_gguf_name = "zero-nl2cmds-v1.Q4_K_M.gguf"
final_gguf_path_colab = f"/content/{final_gguf_name}"
gdrive_output_dir = "/content/drive/MyDrive/GGUF_Models/"
os.makedirs(gdrive_output_dir, exist_ok=True)
final_gguf_path_gdrive = os.path.join(gdrive_output_dir, final_gguf_name)

# --- Check for the file before copying ---
if not os.path.exists(final_gguf_path_colab):
    print(f"❌ ERROR: Cannot find the file '{final_gguf_name}' to copy. Did Block 2 run successfully?")
else:
    # --- Copy to Google Drive ---
    print(f"Moving '{final_gguf_name}' to your Google Drive...")
    !cp {final_gguf_path_colab} "{final_gguf_path_gdrive}"
    print("Move complete.")

    # --- Clean Up Colab Environment ---
    print("\nCleaning up temporary files...")
    # Using -f to force removal and ignore "not found" errors from previous runs
    !rm -f /content/model-f16.gguf
    !rm -f {final_gguf_path_colab}
    !rm -rf /content/hub_model
    !rm -rf /content/llama.cpp
    print("Cleanup complete.")

    # --- Final Confirmation ---
    print("\n" + "="*60)
    print("✅✅✅ PROCESS COMPLETE! ✅✅✅")
    print("Your final model file is now permanently saved in your Google Drive.")
    print("You can find it in the 'GGUF_Models' folder.")
    print("Here are the details:")
    !ls -lh "{final_gguf_path_gdrive}"
    print("\nWe are now ready to build the local CLI tool.")
    print("="*60)