In [1]:
import sys
if sys.version_info < (3, 10):
    print("⚠️  Warning: Python 3.10+ recommended. Current:", sys.version)


In [2]:
!pip install -q gitpython==3.1.46
!pip install -q tree-sitter==0.25.2 tree-sitter-languages==1.10.2
!pip install -q sentence-transformers==5.2.2
!pip install -q faiss-cpu==1.13.2
!pip install -q transformers==5.0.0 accelerate==1.12.0 bitsandbytes==0.49.1
!pip install -q torch>=2.3.0


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/635.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m634.9/635.4 kB[0m [31m22.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m635.4/635.4 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m111.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m89.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import os
import re
from pathlib import Path
from typing import List, Dict, Any
import subprocess


In [4]:
import git
from tree_sitter_languages import get_parser, get_language
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


In [5]:
print(f"PyTorch version: {torch.__version__}")
if torch.__version__ < "2.1.0":
    print("⚠️  Warning: PyTorch 2.3+ recommended for transformers v5.0")


PyTorch version: 2.9.0+cu126


In [6]:

class GitRepoIngestion:
    """Clone and extract code files from Git repository"""

    def __init__(self, repo_url: str, local_path: str = "./repo"):
        self.repo_url = repo_url
        self.local_path = local_path
        self.code_extensions = {
            '.js', '.ts', '.tsx', '.jsx', '.py', '.java', '.go', '.rb',
            '.cpp', '.c', '.h', '.hpp', '.cs', '.php', '.sh', '.bash',
            '.yml', '.yaml', '.json', '.md', '.rs', '.swift', '.kt'
        }

    def clone_repo(self):
        """Clone the repository"""
        if os.path.exists(self.local_path):
            print(f"Repository already exists at {self.local_path}")
            return git.Repo(self.local_path)

        print(f"Cloning {self.repo_url}...")
        repo = git.Repo.clone_from(self.repo_url, self.local_path)
        print(f"✓ Cloned successfully")
        return repo

    def get_code_files(self) -> List[Dict[str, str]]:
        """Extract all code files with content"""
        files = []
        exclude_dirs = {
            '.git', 'node_modules', '__pycache__', 'dist', 'build',
            '.venv', 'venv', '.pytest_cache', '.mypy_cache', 'target',
            'bin', 'obj', '.gradle'
        }

        for root, dirs, filenames in os.walk(self.local_path):
            # Skip excluded directories
            dirs[:] = [d for d in dirs if d not in exclude_dirs]

            for filename in filenames:
                file_path = Path(root) / filename
                if file_path.suffix in self.code_extensions:
                    try:
                        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                            content = f.read()

                        # Skip very large files (>500KB)
                        if len(content) > 500_000:
                            continue

                        relative_path = file_path.relative_to(self.local_path)
                        files.append({
                            'path': str(relative_path),
                            'content': content,
                            'language': file_path.suffix[1:]  # Remove the dot
                        })
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")

        print(f"✓ Found {len(files)} code files")
        return files


In [7]:

class CodeChunker:
    """Parse and chunk code into meaningful segments"""

    def __init__(self):
        self.parser_cache = {}

    def get_parser(self, language: str):
        """Get tree-sitter parser for language"""
        lang_map = {
            'js': 'javascript',
            'jsx': 'javascript',
            'ts': 'typescript',
            'tsx': 'typescript',
            'py': 'python',
            'java': 'java',
            'go': 'go',
            'rb': 'ruby',
            'cpp': 'cpp',
            'c': 'c',
            'cs': 'c_sharp',
            'sh': 'bash',
            'bash': 'bash',
            'rs': 'rust',
            'php': 'php'
        }

        lang = lang_map.get(language, language)
        if lang not in self.parser_cache:
            try:
                self.parser_cache[lang] = get_parser(lang)
            except Exception as e:
                print(f"Parser not available for {lang}: {e}")
                return None
        return self.parser_cache.get(lang)

    def chunk_code(self, files: List[Dict[str, str]]) -> List[Dict[str, Any]]:
        """Create chunks from code files"""
        chunks = []

        for file in files:
            # For simplicity, use function/class-based chunking with fallback
            parser = self.get_parser(file['language'])

            if parser and file['language'] in ['py', 'js', 'ts', 'jsx', 'tsx', 'java', 'go', 'rs']:
                file_chunks = self._parse_with_tree_sitter(file, parser)
                if file_chunks:
                    chunks.extend(file_chunks)
                else:
                    # Fallback if parser didn't extract anything
                    chunks.extend(self._simple_chunk(file))
            else:
                # Fallback: chunk by logical blocks
                chunks.extend(self._simple_chunk(file))

        print(f"✓ Created {len(chunks)} code chunks")
        return chunks

    def _parse_with_tree_sitter(self, file: Dict, parser) -> List[Dict]:
        """Parse file with tree-sitter (updated for v0.25.2)"""
        chunks = []
        try:
            tree = parser.parse(bytes(file['content'], 'utf8'))

            # Extract function and class definitions recursively
            self._extract_nodes(tree.root_node, file, chunks)

            # If no chunks extracted, return empty to trigger fallback
            if not chunks:
                return []

        except Exception as e:
            print(f"Parser error for {file['path']}: {e}")
            return []

        return chunks

    def _extract_nodes(self, node, file: Dict, chunks: List):
        """Recursively extract function and class nodes"""
        # Node types that represent meaningful code blocks
        interesting_types = {
            'function_definition', 'function_declaration', 'function_item',  # Python, JS, Rust
            'class_definition', 'class_declaration', 'class_item',  # Classes
            'method_definition', 'method_declaration',  # Methods
            'interface_declaration', 'struct_item',  # TypeScript, Rust
            'impl_item',  # Rust implementations
        }

        if node.type in interesting_types:
            code = file['content'][node.start_byte:node.end_byte]

            # Extract name if possible
            name = self._extract_name(node, file['content'])

            chunks.append({
                'content': code,
                'path': file['path'],
                'language': file['language'],
                'type': node.type,
                'name': name,
                'start_line': node.start_point[0] + 1,
                'end_line': node.end_point[0] + 1
            })

        # Recurse through children
        for child in node.children:
            self._extract_nodes(child, file, chunks)

    def _extract_name(self, node, content: str) -> str:
        """Extract name from node"""
        try:
            # Look for identifier in children
            for child in node.children:
                if 'identifier' in child.type or child.type == 'name':
                    return content[child.start_byte:child.end_byte]
        except:
            pass
        return "unknown"

    def _simple_chunk(self, file: Dict) -> List[Dict]:
        """Simple chunking by size with overlap"""
        content = file['content']
        lines = content.split('\n')

        # Special handling for README files - keep them mostly whole
        if 'readme' in file['path'].lower():
            chunk_size = 200  # Larger chunks for README
            overlap = 20
        else:
            chunk_size = 80
            overlap = 15

        chunks = []

        for i in range(0, len(lines), chunk_size - overlap):
            chunk_lines = lines[i:i + chunk_size]
            if chunk_lines and any(line.strip() for line in chunk_lines):  # Skip empty chunks
                chunks.append({
                    'content': '\n'.join(chunk_lines),
                    'path': file['path'],
                    'language': file['language'],
                    'type': 'chunk',
                    'name': f'chunk_{i}',
                    'start_line': i + 1,
                    'end_line': min(i + len(chunk_lines), len(lines))
                })

        return chunks


In [8]:

class CodeVectorStore:
    """Embed code chunks and enable similarity search"""

    def __init__(self, model_name: str = "BAAI/bge-small-en-v1.5"):
        """
        Initialize with better embedding model (Jan 2026)

        Options:
        - BAAI/bge-small-en-v1.5 (recommended, general purpose, better than MiniLM)
        - jinaai/jina-embeddings-v2-base-code (code-specific, 8192 context)
        - sentence-transformers/all-mpnet-base-v2 (good alternative)
        """
        print(f"Loading embedding model: {model_name}...")
        self.embedding_model = SentenceTransformer(
            model_name,
            trust_remote_code=True  # Required for some newer models
        )
        self.chunks = []
        self.index = None
        print(f"✓ Embedding model loaded (dim: {self.embedding_model.get_sentence_embedding_dimension()})")

    def embed_chunks(self, chunks: List[Dict[str, Any]]):
        """Generate embeddings for all chunks"""
        self.chunks = chunks

        print(f"Generating embeddings for {len(chunks)} chunks...")

        # Create rich text representations for embedding
        texts = []
        for c in chunks:
            # Include metadata in embedding for better retrieval
            metadata = f"File: {c['path']} | Language: {c['language']}"
            if c.get('name') and c['name'] != 'unknown':
                metadata += f" | {c['type']}: {c['name']}"

            text = f"{metadata}\n\n{c['content'][:2000]}"  # Limit to 2000 chars
            texts.append(text)

        # Generate embeddings with sentence-transformers v5.2.2
        embeddings = self.embedding_model.encode(
            texts,
            show_progress_bar=True,
            batch_size=32,
            normalize_embeddings=True  # Normalize for cosine similarity
        )

        # Create FAISS index (using cosine similarity via normalization)
        dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatIP(dimension)  # Inner Product = cosine similarity with normalized vectors
        self.index.add(embeddings.astype('float32'))

        print(f"✓ Vector store created with {self.index.ntotal} embeddings")

    def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        """Search for relevant code chunks"""
        # Embed query
        query_embedding = self.embedding_model.encode(
            [query],
            normalize_embeddings=True
        )

        # Search
        scores, indices = self.index.search(query_embedding.astype('float32'), top_k)

        results = []
        for idx, score in zip(indices[0], scores[0]):
            if idx < len(self.chunks):  # Safety check
                chunk = self.chunks[idx].copy()
                chunk['score'] = float(score)
                results.append(chunk)

        return results


In [22]:
class CodeQABot:
    """LLM-based Q&A bot for code"""

    # def __init__(self, model_name: str = "Qwen/Qwen2.5-Coder-14B-Instruct"):
    def __init__(self, model_name: str = "codellama/CodeLlama-7b-hf"):
        """Initialize with code LLM"""
        print(f"Loading LLM: {model_name}...")

        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4"
        )

        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=True
        )

        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quantization_config,
            device_map="auto",
            trust_remote_code=True,
            torch_dtype=torch.float16
        )

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        print(f"✓ LLM loaded (4-bit quantized, {model_name})")
        print(f"  Memory footprint: ~{torch.cuda.memory_allocated() / 1024**3:.1f}GB")

    def generate_answer(self, query: str, context_chunks: List[Dict]) -> str:
        """Generate answer using retrieved context"""

        # Build context
        context = "# CODE CONTEXT:\n\n"
        for i, chunk in enumerate(context_chunks[:3], 1):
            header = f"## File: {chunk['path']} (Lines {chunk['start_line']}-{chunk['end_line']})"
            if chunk.get('name') and chunk['name'] != 'unknown':
                header += f" - {chunk['name']}"

            context += f"{header}\n```{chunk['language']}\n"
            content = chunk['content'][:1200] if len(chunk['content']) > 1200 else chunk['content']
            context += f"{content}\n```\n\n"

        # Clear, strict prompt
        prompt = f"""Answer the question based on the code provided. Be concise and accurate.

{context}

Question: {query}

Answer:"""

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=2048
        ).to(self.model.device)

        # Optimized generation parameters
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=250,        # Shorter responses
                temperature=0.3,           # More focused
                do_sample=True,
                top_p=0.85,
                top_k=30,
                repetition_penalty=1.3,    # Prevent repetition
                no_repeat_ngram_size=3,
                pad_token_id=self.tokenizer.pad_token_id,
                eos_token_id=self.tokenizer.eos_token_id
            )

        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract answer
        if "Answer:" in full_response:
            answer = full_response.split("Answer:")[-1].strip()
        else:
            answer = full_response[len(prompt):].strip()

        # Clean up repetitions
        sentences = answer.split('. ')
        unique_sentences = []
        seen = set()
        for sent in sentences:
            sent_clean = sent.strip().lower()
            if sent_clean and sent_clean not in seen:
                seen.add(sent_clean)
                unique_sentences.append(sent.strip())

        answer = '. '.join(unique_sentences)
        if not answer.endswith('.'):
            answer += '.'

        return answer

In [23]:

def main():
    """Main execution pipeline"""

    # Configuration
    REPO_URL = "https://github.com/raghavendramallela/pulumi-aws-psql-sample.git"

    print("="*70)
    print("🤖 LLM CODE Q&A BOT - RAG DEMO (Updated Jan 2026)")
    print("="*70)
    print(f"Models: BGE-small-en-v1.5 + Qwen2.5-Coder-3B")
    print(f"Python: {sys.version_info.major}.{sys.version_info.minor}")
    print(f"PyTorch: {torch.__version__}")
    print(f"CUDA Available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
    print("="*70)

    # Step 1: Clone and ingest repository
    print("\n[1/5] 📥 Cloning repository...")
    ingestion = GitRepoIngestion(REPO_URL)
    ingestion.clone_repo()
    files = ingestion.get_code_files()

    if not files:
        print("❌ No code files found!")
        return None, None

    # Step 2: Parse and chunk code
    print("\n[2/5] 🔍 Parsing and chunking code...")
    chunker = CodeChunker()
    chunks = chunker.chunk_code(files)

    if not chunks:
        print("❌ No chunks created!")
        return None, None

    # Step 3: Create embeddings and vector store
    print("\n[3/5] 🧮 Creating vector store...")
    vector_store = CodeVectorStore()  # Uses BGE-small by default
    vector_store.embed_chunks(chunks)

    # Step 4: Load LLM
    print("\n[4/5] 🧠 Loading LLM...")
    qa_bot = CodeQABot()  # Uses Qwen2.5-Coder-3B by default

    # Step 5: Interactive Q&A
    print("\n[5/5] ✅ System ready!")
    print("="*70)

    # Example queries
    queries = [
        # "What does this repository do? Give me a brief overview.",
        # "How do i provision a database using an existing backup database file?",
        # "What database parameters can I configure?"
    ]

    for query in queries:
        print(f"\n{'='*70}")
        print(f"❓ QUERY: {query}")
        print(f"{'='*70}")

        # Retrieve relevant chunks
        results = vector_store.search(query, top_k=5)

        print(f"\n📚 Retrieved {len(results)} relevant code chunks:")
        for i, chunk in enumerate(results[:3], 1):
            name_info = f" ({chunk['name']})" if chunk.get('name') != 'unknown' else ""
            print(f"  {i}. {chunk['path']}{name_info} (lines {chunk['start_line']}-{chunk['end_line']}) [score: {chunk['score']:.3f}]")

        # Generate answer
        print("\n🤖 Generating answer...\n")
        answer = qa_bot.generate_answer(query, results)
        print(answer)
        print()

    return vector_store, qa_bot



In [24]:
def ask_question(question: str, vector_store, qa_bot):
    """Ask a custom question with improved retrieval"""
    print(f"\n{'='*70}")
    print(f"❓ QUERY: {question}")
    print(f"{'='*70}")

    # Get more results for filtering
    results = vector_store.search(question, top_k=10)

    # Filter and re-rank
    query_lower = question.lower()
    if any(term in query_lower for term in ['explain', 'overview', 'what does', 'codebase']):
        # Prioritize README and main source files
        filtered = []
        for r in results:
            path_lower = r['path'].lower()
            # Boost README and main files
            if 'readme' in path_lower or 'index' in path_lower or 'main' in path_lower:
                r['score'] += 0.5
            # Penalize config files
            if any(ext in path_lower for ext in ['.yml', '.yaml', '.json', 'package.json']):
                r['score'] -= 0.3
            filtered.append(r)

        # Re-sort and take top 5
        filtered.sort(key=lambda x: x['score'], reverse=True)
        results = filtered[:5]
    else:
        results = results[:5]

    print(f"\n📚 Found {len(results)} relevant chunks:")
    for i, chunk in enumerate(results[:3], 1):
        name_info = f" ({chunk['name']})" if chunk.get('name') != 'unknown' else ""
        print(f"  {i}. {chunk['path']}{name_info} (lines {chunk['start_line']}-{chunk['end_line']}) [score: {chunk['score']:.3f}]")

    print("\n🤖 ANSWER:\n")
    answer = qa_bot.generate_answer(question, results)
    print(answer)
    print()

    return answer

In [25]:

if __name__ == "__main__":
    # Run main pipeline
    vector_store, qa_bot = main()

    if vector_store and qa_bot:
        print("\n" + "="*70)
        print("✨ System Ready! You can now ask custom questions using:")
        print("   ask_question('your question here', vector_store, qa_bot)")
        print("="*70)
        # print("\nExample questions:")
        # print("  • What are all the inputs this action accepts?")
        # print("  • Show me how authentication is handled")
        # print("  • What happens when the repository doesn't exist?")
        # print("  • How does this handle submodules?")


🤖 LLM CODE Q&A BOT - RAG DEMO (Updated Jan 2026)
Models: BGE-small-en-v1.5 + Qwen2.5-Coder-3B
Python: 3.12
PyTorch: 2.9.0+cu126
CUDA Available: True
GPU: Tesla T4

[1/5] 📥 Cloning repository...
Repository already exists at ./repo
✓ Found 7 code files

[2/5] 🔍 Parsing and chunking code...
Parser not available for yaml: __init__() takes exactly 1 argument (2 given)
Parser not available for md: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for typescript: __init__() takes exactly 1 argument (2 given)
Parser not available for yaml: __init__() takes exactly 1 argument (2 given)
✓ Created 81 code chunks

[3/5] 🧮 Creating vector store...
Loading embedding model: BAAI/bge-small-en-v1.5...


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: BAAI/bge-small-en-v1.5
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


✓ Embedding model loaded (dim: 384)
Generating embeddings for 81 chunks...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

✓ Vector store created with 81 embeddings

[4/5] 🧠 Loading LLM...
Loading LLM: codellama/CodeLlama-7b-hf...


config.json:   0%|          | 0.00/637 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

✓ LLM loaded (4-bit quantized, codellama/CodeLlama-7b-hf)
  Memory footprint: ~13.5GB

[5/5] ✅ System ready!

✨ System Ready! You can now ask custom questions using:
   ask_question('your question here', vector_store, qa_bot)


In [29]:
ask_question("How do i Create RDS PostgreSQL instance?",vector_store,qa_bot)


❓ QUERY: How do i Create RDS PostgreSQL instance?

📚 Found 5 relevant chunks:
  1. index.ts (chunk_195) (lines 196-275) [score: 0.853]
  2. index.ts (chunk_130) (lines 131-210) [score: 0.786]
  3. index.ts (chunk_65) (lines 66-145) [score: 0.740]

🤖 ANSWER:

The `aws.rd.



'The `aws.rd.'

In [21]:

if __name__ == "__main__":
    # Run main pipeline
    vector_store, qa_bot = main()

    if vector_store and qa_bot:
        print("\n" + "="*70)
        print("✨ System Ready! You can now ask custom questions using:")
        print("   ask_question('How do i provision a database using an existing backup database file?', vector_store, qa_bot)")
        print("="*70)
        # print("\nExample questions:")
        # print("  • What are all the inputs this action accepts?")
        # print("  • Show me how authentication is handled")
        # print("  • What happens when the repository doesn't exist?")
        # print("  • How does this handle submodules?")


🤖 LLM CODE Q&A BOT - RAG DEMO (Updated Jan 2026)
Models: BGE-small-en-v1.5 + Qwen2.5-Coder-3B
Python: 3.12
PyTorch: 2.9.0+cu126
CUDA Available: True
GPU: Tesla T4

[1/5] 📥 Cloning repository...
Repository already exists at ./repo
✓ Found 7 code files

[2/5] 🔍 Parsing and chunking code...
Parser not available for yaml: __init__() takes exactly 1 argument (2 given)
Parser not available for md: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for typescript: __init__() takes exactly 1 argument (2 given)
Parser not available for yaml: __init__() takes exactly 1 argument (2 given)
✓ Created 81 code chunks

[3/5] 🧮 Creating vector store...
Loading embedding model: BAAI/bge-small-en-v1.5...


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: BAAI/bge-small-en-v1.5
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


✓ Embedding model loaded (dim: 384)
Generating embeddings for 81 chunks...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

✓ Vector store created with 81 embeddings

[4/5] 🧠 Loading LLM...
Loading LLM: Qwen/Qwen2.5-Coder-14B-Instruct...


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
ask_question("How do i provision a database using an existing backup database file?",vector_store,qa_bot)


❓ QUERY: Give me what an example of using of all functions which can be used in my github excluding checkout 

📚 Found 5 relevant chunks:
  1. .github/workflows/test.yml (chunk_260) (lines 261-340) [score: 0.759]
  2. adrs/0153-checkout-v2.md (chunk_260) (lines 261-291) [score: 0.745]
  3. action.yml (chunk_0) (lines 1-80) [score: 0.743]

🤖 ANSWER:

Based on the context provided, here's an example usage of other common GitHub Actions functionalities beyond just the checkout function:

```yaml
jobs:
  build_and_test:
   runs-on:ubuntu-latest
    
    steps: 
      
      # Install dependencies  
      - uses: nvm/setup-nvm@v1
        ...
        
      - run: npm install
      
      # Run tests    
      - script: npm test
  
      # Deploy artifacts     
      - upload-artifact: Upload Test Results
        ...

      # Send notifications   
      - send-email-notification: Notify Team
        ... # Cache results       
      - cache-dependencies: Save Dependencies
        ......
```


"Based on the context provided, here's an example usage of other common GitHub Actions functionalities beyond just the checkout function:\n\n```yaml\njobs:\n  build_and_test:\n   runs-on:ubuntu-latest\n    \n    steps: \n      \n      # Install dependencies  \n      - uses: nvm/setup-nvm@v1\n        ...\n        \n      - run: npm install\n      \n      # Run tests    \n      - script: npm test\n  \n      # Deploy artifacts     \n      - upload-artifact: Upload Test Results\n        ...\n\n      # Send notifications   \n      - send-email-notification: Notify Team\n        ... # Cache results       \n      - cache-dependencies: Save Dependencies\n        ......\n```\n\nThis demonstrates how you might use additional GitHub Action features like dependency management (`setup-node`, etc.), testing execution, artifact uploading, notification sending, caching, etc., alongside but separate from the initial checkout operation shown earlier in the `.github/workows/test.yml`."

In [None]:
ask_question("explain a workflow except checkout ",vector_store,qa_bot)


❓ QUERY: explain a workflow except checkout 

📚 Found 5 relevant chunks:
  1. README.md (chunk_360) (lines 361-372) [score: 1.174]
  2. adrs/0153-checkout-v2.md (chunk_195) (lines 196-275) [score: 0.728]
  3. adrs/0153-checkout-v2.md (chunk_0) (lines 1-80) [score: 0.698]

🤖 ANSWER:

Based on the context provided, there isn't enough information to provide a detailed explanation of what happens after the "Checkout" stage in a typical GitHub Action Workflow beyond noting that subsequent steps would likely involve operating on the checked-out files. Additional configuration within the YAML file detailing further stages such as running tests, building artifacts, deploying etc., would dictate specific workflows but these aren’t outlined here.



'Based on the context provided, there isn\'t enough information to provide a detailed explanation of what happens after the "Checkout" stage in a typical GitHub Action Workflow beyond noting that subsequent steps would likely involve operating on the checked-out files. Additional configuration within the YAML file detailing further stages such as running tests, building artifacts, deploying etc., would dictate specific workflows but these aren’t outlined here.'

In [None]:
ask_question("show the context that u have ",vector_store,qa_bot)


❓ QUERY: show the context that u have 

📚 Found 5 relevant chunks:
  1. src/git-version.ts (chunk_65) (lines 66-78) [score: 0.642]
  2. __test__/git-version.test.ts (chunk_65) (lines 66-87) [score: 0.642]
  3. src/misc/generate-docs.ts (chunk_65) (lines 66-127) [score: 0.638]

🤖 ANSWER:

The provided code snippets are from three different files in a TypeScript project:
1. **src/git-version** lines 65-77 contain methods related to converting version numbers into strings.
2. **__tests__/git_version_test** lines around 67 include test cases checking whether certain git versions meet minimum requirements compared with another sparse version (`min_sparse_ver`).
3. Lastly, **misc_generate_docs**, starting at line 68 through 126 appears to be part of functionality generating documentation where it processes descriptions ensuring they fit within specified widths or breaking them appropriately when exceeding those limits.



'The provided code snippets are from three different files in a TypeScript project:\n1. **src/git-version** lines 65-77 contain methods related to converting version numbers into strings.\n2. **__tests__/git_version_test** lines around 67 include test cases checking whether certain git versions meet minimum requirements compared with another sparse version (`min_sparse_ver`).\n3. Lastly, **misc_generate_docs**, starting at line 68 through 126 appears to be part of functionality generating documentation where it processes descriptions ensuring they fit within specified widths or breaking them appropriately when exceeding those limits.'