<a href="https://colab.research.google.com/github/shahdhesham/Colab-Thesis/blob/main/LLAMA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# instruct models
#lets check if we can add deepseek

LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

WIZARDCODER = "WizardLMTeam/WizardCoder-15B-V1.0" #They are not instructtuned for message structure, double check how to send a message
MAGICODER_CL = "ise-uiuc/Magicoder-CL-7B"
 #A 7B-parameter model fine-tuned from CodeLlama-7B-Python-hf using the Magicoder-OSS-Instruct-75K dataset, designed for general code-related tasks such as code generation and translation
MAGICODER_S_CL = "ise-uiuc/Magicoder-S-CL-7B"#An enhanced version of Magicoder-CL-7B, further fine-tuned with the Magicoder-Evol-Instruct-110K dataset to improve instruction-following capabilities, making it more adept at handling complex coding instructions.
CODELLAMA = "codellama/CodeLlama-7b-Instruct-hf"

In [None]:
#Static LLAMA + Assistant showing in output
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Your C++ code
prompt_code = """
#include <iostream>
using namespace std;

int main() {
    int first_number, second_number, sum;
    cout << "Enter two integers: ";
    cin >> first_number >> second_number;
    sum = first_number + second_number;
    cout << first_number << " + " << second_number << " = " << sum;
    return 0;
}
"""

# Corrected message format
messages_code = [
    {
        "role": "system",
        "content": """You are an expert code translator. Your ONLY task is to convert C++ code to C code.
Rules you MUST follow:
1. Output ONLY executable C code
2. Never include markdown (```c) or explanations
3. Preserve all functionality exactly
4. Use standard C libraries
5. Match the original code's input/output behavior"""
    },
    {
        "role": "user",
        "content": f"""Translate this C++ code to C code:

C++ Code:
{prompt_code}

C Code:"""
    }
]

# Model loading
model_name = LLAMA  # Replace with your actual LLaMA model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

# Tokenizer setup
tokenizer.pad_token = tokenizer.eos_token  # Critical fix for the warning

# Generate with proper attention mask
inputs = tokenizer.apply_chat_template(
    messages_code,
    return_tensors="pt",
    add_generation_prompt=True  # Ensures proper continuation
).to("cuda")

# Generation with attention mask
outputs = model.generate(
    inputs,
    attention_mask=inputs.ne(tokenizer.pad_token_id),  # Fixes attention mask warning
    max_new_tokens=200,
    pad_token_id=tokenizer.eos_token_id  # Explicit pad token setting
)

# Clean output
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
c_code = full_response.split("C Code:")[-1].strip()

print(c_code)

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

assistant

#include <stdio.h>

int main() {
    int first_number, second_number, sum;
    printf("Enter two integers: ");
    scanf("%d %d", &first_number, &second_number);
    sum = first_number + second_number;
    printf("%d + %d = %d\n", first_number, second_number, sum);
    return 0;
}


In [4]:
#Iterative LLAMA

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load model and tokenizer
model_name = LLAMA  # Replace this
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto",torch_dtype=torch.float16)#FP16 is commonly used with LLAMA models to reduce memory usage with minimal impact on performance
tokenizer.pad_token = tokenizer.eos_token  # Fix pad token warning

# Initial C++ code
cpp_code = """
#include <iostream>
using namespace std;

int main() {
    int a, b;
    cout << "Enter two numbers: ";
    cin >> a >> b;
    cout << "Sum: " << a + b << endl;
    return 0;
}
"""

# Static system message (kept constant)
system_prompt = """You are an expert code translator. Your ONLY task is to convert C++ code to C code.
Rules you MUST follow:
1. Output ONLY executable C code
2. Never include markdown (```c) or explanations
3. Preserve all functionality exactly
4. Use standard C libraries
5. Match the original code's input/output behavior"""

def translate(cpp_code, refinement_prompt=""):
    # Build chat messages
    # user_prompt = f"Translate this C++ code to C code:\n{refinement_prompt}\n\nC++ Code:\n{cpp_code}\n\nC Code:"
    user_prompt = f"""
    Translate this C++ code to C code:
    {refinement_prompt}

    C++ Code:
    {cpp_code}

    C Code:
    """

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    # Tokenize with chat template
    inputs = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt",
        add_generation_prompt=True
    ).to("cuda")

    # Generate output
    outputs = model.generate(
        inputs,
        attention_mask=inputs.ne(tokenizer.pad_token_id),
        max_new_tokens=500,
        pad_token_id=tokenizer.eos_token_id
    )

    # Extract and return only the C code
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Clean up unwanted preamble like "assistant" or extra labels
    cleaned = decoded.split("C Code:")[-1]
    cleaned = cleaned.replace("assistant", "").strip() # The only way to remove assistant , cant be removed from prompt
    return cleaned


# Initial run
translated = translate(cpp_code)
print("\nInitial C translation:\n")
print(translated)

# Interactive loop
while True:
    refinement = input("\nRefinement prompt (or type 'quit'): ")
    if refinement.lower() == 'quit':
        break
    translated = translate(cpp_code, refinement)
    print("\nRefined C code:\n")
    print(translated)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]




Initial C translation:

#include <stdio.h>

int main() {
    int a, b;
    printf("Enter two numbers: ");
    scanf("%d %d", &a, &b);
    printf("Sum: %d\n", a + b);
    return 0;
}

Refined C code:

#include <stdio.h>

int main() {
    int a, b;
    printf("Enter two numbers: ");
    scanf("%d %d", &a, &b);
    printf("Sum: %d\n", a + b);
    return 0;
}

Refinement prompt (or type 'quit'): quit
