In [None]:
##### Code Markdown with RAG + Gradio (Apple Mac Compatible Only)

"""
This notebook generates code markdown files using the CodeLlama GGUF model and allows querying explained content.
It runs locally on Apple Silicon (M1/M2/M3 Macs)** using llama-cpp-python. GPU/CUDA not supported yet.

Coming Soon: CUDA support for Linux/Windows users with NVIDIA GPUs.

---

### Prerequisites
1. [Install Anaconda](https://www.anaconda.com/products/distribution#download-section)
2. Create and activate a Conda environment:
    ```bash
    conda create -n code_llama_env python=3.10 -y
    conda activate code_llama_env
    ```
3. [Create a Hugging Face token](https://huggingface.co/settings/tokens)
    - Add it to a `.env` file in the same directory:
      ```env
      HF_TOKEN=your_token_here
      ```

---

### 📦 Install Dependencies (only once)
```bash
pip install llama-cpp-python huggingface_hub chromadb sentence-transformers python-dotenv gradio
```

---
"""


In [None]:
# Run only first time.
!pip install huggingface_hub llama-cpp-python

In [None]:
# Usefull imports

from huggingface_hub import login
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from dotenv import load_dotenv
import os
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.utils import embedding_functions
import os

In [None]:
# Load environment variables (.env must contain HF_TOKEN)

load_dotenv(override=True)

HF_TOKEN = os.getenv('HF_TOKEN')
if HF_TOKEN:
    print(f"HF_TOKEN Key exists and begins {HF_TOKEN[:8]}")
else:
    print("HF_TOKEN Key not set")




In [None]:
# Download CodeLlama GGUF Model (only if not already present)
repo_id = "TheBloke/CodeLlama-7B-Instruct-GGUF"
filename = "codellama-7b-instruct.Q4_K_M.gguf"
model_dir = "models"
os.makedirs(model_dir, exist_ok=True)

model_path = os.path.join(model_dir, filename)

if not os.path.exists(model_path):
    model_path = hf_hub_download(
        repo_id=repo_id,
        filename=filename,
        token=HF_TOKEN,
        local_dir=model_dir
    )
    print(f"✅ Model downloaded to: {model_path}")
else:
    print(f"✅ Model already exists at: {model_path}")

In [None]:
# Load CodeLlama model using llama-cpp-python

llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4  # Adjust based on your CPU cores
)

In [None]:
# Read Input Code File
def read_code_file(filepath):
    with open(filepath, 'r') as f:
        return f.read()

code = read_code_file("sample.cs")  # Make sure this file exists in the same folder

In [None]:
# Build Prompt
def build_prompt(code: str) -> str:
    return f"""### Instruction:
You are a helpful coding assistant. Explain what the following code does with detailed step-by-step explanation.
Ignore if it is a binary file, image and compressed archive.
Explain each class, functions and each line.
At last put the conclusion at the end of file giving the brief summary.
Do not include code in explaination.
### Code:
{code}

### Explanation:""".strip()

In [None]:
# Generate code markdown using LLM.

prompt = build_prompt(code)

output = llm(prompt, max_tokens=512)
explanation = output["choices"][0]["text"].strip()



print("🔍 Explanation:\n", explanation)

In [None]:
# Save generated markdown to File
with open("code_markdown.txt", "w") as f:
    f.write(explanation)

print("Markdown saved to 'code_markdown.txt'")

In [None]:
# Load Embedder and Vector DB

embedder = SentenceTransformer("all-MiniLM-L6-v2")
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection("code_explanations")
code_path = "sample.cs"
embedding = embedder.encode(explanation)
collection.add(
    documents=[explanation],
    metadatas=[{"source": code}],
    ids=[f"code-{os.path.basename(code_path)}"],
    embeddings=[embedding]
)
print("Stored in ChromaDB")

In [None]:
# Ask a question
question = "How many nested loops are there.?"  # Replace with your question
question_embedding = embedder.encode(question)
results = collection.query(query_embeddings=[question_embedding], n_results=1)
context = results["documents"][0][0]

In [None]:
context

In [None]:
qa_prompt = f"""### Instruction:
Use the context below to answer the user question.

### Context:
{context}

### Question:
{question}

### Answer:"""

answer_response = llm(qa_prompt, max_tokens=256)
answer = answer_response["choices"][0]["text"].strip()

print("Answer:\n", answer)