<a href="https://colab.research.google.com/github/ndarshan223/ai-hands-on/blob/main/coderag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import sys
if sys.version_info < (3, 10):
    print("⚠️  Warning: Python 3.10+ recommended. Current:", sys.version)


In [25]:
!pip install -q gitpython==3.1.46
!pip install -q tree-sitter==0.25.2 tree-sitter-languages==1.10.2
!pip install -q sentence-transformers==5.2.2
!pip install -q faiss-cpu==1.13.2
!pip install -q transformers==5.0.0 accelerate==1.12.0 bitsandbytes==0.49.1
!pip install -q torch>=2.3.0


In [26]:
import os
import re
from pathlib import Path
from typing import List, Dict, Any
import subprocess


In [27]:
import git
from tree_sitter_languages import get_parser, get_language
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


In [28]:
print(f"PyTorch version: {torch.__version__}")
if torch.__version__ < "2.1.0":
    print("⚠️  Warning: PyTorch 2.3+ recommended for transformers v5.0")


PyTorch version: 2.9.0+cu126


In [29]:

class GitRepoIngestion:
    """Clone and extract code files from Git repository"""

    def __init__(self, repo_url: str, local_path: str = "./repo"):
        self.repo_url = repo_url
        self.local_path = local_path
        self.code_extensions = {
            '.js', '.ts', '.tsx', '.jsx', '.py', '.java', '.go', '.rb',
            '.cpp', '.c', '.h', '.hpp', '.cs', '.php', '.sh', '.bash',
            '.yml', '.yaml', '.json', '.md', '.rs', '.swift', '.kt'
        }

    def clone_repo(self):
        """Clone the repository"""
        if os.path.exists(self.local_path):
            print(f"Repository already exists at {self.local_path}")
            return git.Repo(self.local_path)

        print(f"Cloning {self.repo_url}...")
        repo = git.Repo.clone_from(self.repo_url, self.local_path)
        print(f"✓ Cloned successfully")
        return repo

    def get_code_files(self) -> List[Dict[str, str]]:
        """Extract all code files with content"""
        files = []
        exclude_dirs = {
            '.git', 'node_modules', '__pycache__', 'dist', 'build',
            '.venv', 'venv', '.pytest_cache', '.mypy_cache', 'target',
            'bin', 'obj', '.gradle'
        }

        for root, dirs, filenames in os.walk(self.local_path):
            # Skip excluded directories
            dirs[:] = [d for d in dirs if d not in exclude_dirs]

            for filename in filenames:
                file_path = Path(root) / filename
                if file_path.suffix in self.code_extensions:
                    try:
                        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                            content = f.read()

                        # Skip very large files (>500KB)
                        if len(content) > 500_000:
                            continue

                        relative_path = file_path.relative_to(self.local_path)
                        files.append({
                            'path': str(relative_path),
                            'content': content,
                            'language': file_path.suffix[1:]  # Remove the dot
                        })
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")

        print(f"✓ Found {len(files)} code files")
        return files


In [30]:

class CodeChunker:
    """Parse and chunk code into meaningful segments"""

    def __init__(self):
        self.parser_cache = {}

    def get_parser(self, language: str):
        """Get tree-sitter parser for language"""
        lang_map = {
            'js': 'javascript',
            'jsx': 'javascript',
            'ts': 'typescript',
            'tsx': 'typescript',
            'py': 'python',
            'java': 'java',
            'go': 'go',
            'rb': 'ruby',
            'cpp': 'cpp',
            'c': 'c',
            'cs': 'c_sharp',
            'sh': 'bash',
            'bash': 'bash',
            'rs': 'rust',
            'php': 'php'
        }

        lang = lang_map.get(language, language)
        if lang not in self.parser_cache:
            try:
                self.parser_cache[lang] = get_parser(lang)
            except Exception as e:
                print(f"Parser not available for {lang}: {e}")
                return None
        return self.parser_cache.get(lang)

    def chunk_code(self, files: List[Dict[str, str]]) -> List[Dict[str, Any]]:
        """Create chunks from code files"""
        chunks = []

        for file in files:
            # For simplicity, use function/class-based chunking with fallback
            parser = self.get_parser(file['language'])

            if parser and file['language'] in ['py', 'js', 'ts', 'jsx', 'tsx', 'java', 'go', 'rs']:
                file_chunks = self._parse_with_tree_sitter(file, parser)
                if file_chunks:
                    chunks.extend(file_chunks)
                else:
                    # Fallback if parser didn't extract anything
                    chunks.extend(self._simple_chunk(file))
            else:
                # Fallback: chunk by logical blocks
                chunks.extend(self._simple_chunk(file))

        print(f"✓ Created {len(chunks)} code chunks")
        return chunks

    def _parse_with_tree_sitter(self, file: Dict, parser) -> List[Dict]:
        """Parse file with tree-sitter (updated for v0.25.2)"""
        chunks = []
        try:
            tree = parser.parse(bytes(file['content'], 'utf8'))

            # Extract function and class definitions recursively
            self._extract_nodes(tree.root_node, file, chunks)

            # If no chunks extracted, return empty to trigger fallback
            if not chunks:
                return []

        except Exception as e:
            print(f"Parser error for {file['path']}: {e}")
            return []

        return chunks

    def _extract_nodes(self, node, file: Dict, chunks: List):
        """Recursively extract function and class nodes"""
        # Node types that represent meaningful code blocks
        interesting_types = {
            'function_definition', 'function_declaration', 'function_item',  # Python, JS, Rust
            'class_definition', 'class_declaration', 'class_item',  # Classes
            'method_definition', 'method_declaration',  # Methods
            'interface_declaration', 'struct_item',  # TypeScript, Rust
            'impl_item',  # Rust implementations
        }

        if node.type in interesting_types:
            code = file['content'][node.start_byte:node.end_byte]

            # Extract name if possible
            name = self._extract_name(node, file['content'])

            chunks.append({
                'content': code,
                'path': file['path'],
                'language': file['language'],
                'type': node.type,
                'name': name,
                'start_line': node.start_point[0] + 1,
                'end_line': node.end_point[0] + 1
            })

        # Recurse through children
        for child in node.children:
            self._extract_nodes(child, file, chunks)

    def _extract_name(self, node, content: str) -> str:
        """Extract name from node"""
        try:
            # Look for identifier in children
            for child in node.children:
                if 'identifier' in child.type or child.type == 'name':
                    return content[child.start_byte:child.end_byte]
        except:
            pass
        return "unknown"

    def _simple_chunk(self, file: Dict) -> List[Dict]:
        """Simple chunking by size with overlap"""
        content = file['content']
        lines = content.split('\n')

        # Chunk by ~80 lines with 15 line overlap (better for context)
        chunk_size = 80
        overlap = 15
        chunks = []

        for i in range(0, len(lines), chunk_size - overlap):
            chunk_lines = lines[i:i + chunk_size]
            if chunk_lines and any(line.strip() for line in chunk_lines):  # Skip empty chunks
                chunks.append({
                    'content': '\n'.join(chunk_lines),
                    'path': file['path'],
                    'language': file['language'],
                    'type': 'chunk',
                    'name': f'chunk_{i}',
                    'start_line': i + 1,
                    'end_line': min(i + len(chunk_lines), len(lines))
                })

        return chunks


In [31]:

class CodeVectorStore:
    """Embed code chunks and enable similarity search"""

    def __init__(self, model_name: str = "BAAI/bge-small-en-v1.5"):
        """
        Initialize with better embedding model (Jan 2026)

        Options:
        - BAAI/bge-small-en-v1.5 (recommended, general purpose, better than MiniLM)
        - jinaai/jina-embeddings-v2-base-code (code-specific, 8192 context)
        - sentence-transformers/all-mpnet-base-v2 (good alternative)
        """
        print(f"Loading embedding model: {model_name}...")
        self.embedding_model = SentenceTransformer(
            model_name,
            trust_remote_code=True  # Required for some newer models
        )
        self.chunks = []
        self.index = None
        print(f"✓ Embedding model loaded (dim: {self.embedding_model.get_sentence_embedding_dimension()})")

    def embed_chunks(self, chunks: List[Dict[str, Any]]):
        """Generate embeddings for all chunks"""
        self.chunks = chunks

        print(f"Generating embeddings for {len(chunks)} chunks...")

        # Create rich text representations for embedding
        texts = []
        for c in chunks:
            # Include metadata in embedding for better retrieval
            metadata = f"File: {c['path']} | Language: {c['language']}"
            if c.get('name') and c['name'] != 'unknown':
                metadata += f" | {c['type']}: {c['name']}"

            text = f"{metadata}\n\n{c['content'][:2000]}"  # Limit to 2000 chars
            texts.append(text)

        # Generate embeddings with sentence-transformers v5.2.2
        embeddings = self.embedding_model.encode(
            texts,
            show_progress_bar=True,
            batch_size=32,
            normalize_embeddings=True  # Normalize for cosine similarity
        )

        # Create FAISS index (using cosine similarity via normalization)
        dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatIP(dimension)  # Inner Product = cosine similarity with normalized vectors
        self.index.add(embeddings.astype('float32'))

        print(f"✓ Vector store created with {self.index.ntotal} embeddings")

    def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        """Search for relevant code chunks"""
        # Embed query
        query_embedding = self.embedding_model.encode(
            [query],
            normalize_embeddings=True
        )

        # Search
        scores, indices = self.index.search(query_embedding.astype('float32'), top_k)

        results = []
        for idx, score in zip(indices[0], scores[0]):
            if idx < len(self.chunks):  # Safety check
                chunk = self.chunks[idx].copy()
                chunk['score'] = float(score)
                results.append(chunk)

        return results


In [32]:

class CodeQABot:
    """LLM-based Q&A bot for code"""

    def __init__(self, model_name: str = "Qwen/Qwen2.5-Coder-14B"):
        """
        Initialize with better code LLM (Jan 2026)

        Options for T4 GPU:
        - Qwen/Qwen2.5-Coder-3B-Instruct (recommended, 3B params, better than 1.5B)
        - Qwen/Qwen2.5-Coder-1.5B-Instruct (smaller, faster)
        - deepseek-ai/deepseek-coder-1.3b-instruct (alternative)
        """
        print(f"Loading LLM: {model_name}...")

        # 4-bit quantization for Colab free tier (works with transformers v5.0)
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4"
        )

        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=True
        )

        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quantization_config,
            device_map="auto",
            trust_remote_code=True,
            torch_dtype=torch.float16
        )

        # Set pad token if not exists
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        print(f"✓ LLM loaded (4-bit quantized, {model_name})")
        print(f"  Memory footprint: ~{torch.cuda.memory_allocated() / 1024**3:.1f}GB")

    def generate_answer(self, query: str, context_chunks: List[Dict]) -> str:
        """Generate answer using retrieved context"""

        # Build context from retrieved chunks
        context = "# RELEVANT CODE CONTEXT:\n\n"
        for i, chunk in enumerate(context_chunks[:3], 1):  # Use top 3 chunks
            header = f"## [{chunk['path']}] (Lines {chunk['start_line']}-{chunk['end_line']})"
            if chunk.get('name') and chunk['name'] != 'unknown':
                header += f" - {chunk['name']}"

            context += f"{header}\n```{chunk['language']}\n"
            # Limit chunk size for token efficiency
            content = chunk['content'][:1000] if len(chunk['content']) > 1000 else chunk['content']
            context += f"{content}\n```\n\n"

        # Create prompt optimized for Qwen2.5-Coder
        prompt = f"""You are an expert code assistant. Answer the user's question based on the provided code context.

Be concise, accurate, and include relevant code examples when appropriate.
Reference file paths and line numbers when discussing specific code.

{context}

User Question: {query}

Answer:"""

        # Tokenize
        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=3072  # Leave room for generation
        ).to(self.model.device)

        # Generate response
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=600,
                temperature=0.9,
                do_sample=True,
                top_p=0.95,
                top_k=50,
                repetition_penalty=1.1,
                pad_token_id=self.tokenizer.pad_token_id,
                eos_token_id=self.tokenizer.eos_token_id
            )

        # Decode response
        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract only the answer part (after "Answer:")
        if "Answer:" in full_response:
            answer = full_response.split("Answer:")[-1].strip()
        else:
            answer = full_response[len(prompt):].strip()

        return answer


In [33]:

def main():
    """Main execution pipeline"""

    # Configuration
    REPO_URL = "https://github.com/actions/checkout"

    print("="*70)
    print("🤖 LLM CODE Q&A BOT - RAG DEMO (Updated Jan 2026)")
    print("="*70)
    print(f"Models: BGE-small-en-v1.5 + Qwen2.5-Coder-3B")
    print(f"Python: {sys.version_info.major}.{sys.version_info.minor}")
    print(f"PyTorch: {torch.__version__}")
    print(f"CUDA Available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
    print("="*70)

    # Step 1: Clone and ingest repository
    print("\n[1/5] 📥 Cloning repository...")
    ingestion = GitRepoIngestion(REPO_URL)
    ingestion.clone_repo()
    files = ingestion.get_code_files()

    if not files:
        print("❌ No code files found!")
        return None, None

    # Step 2: Parse and chunk code
    print("\n[2/5] 🔍 Parsing and chunking code...")
    chunker = CodeChunker()
    chunks = chunker.chunk_code(files)

    if not chunks:
        print("❌ No chunks created!")
        return None, None

    # Step 3: Create embeddings and vector store
    print("\n[3/5] 🧮 Creating vector store...")
    vector_store = CodeVectorStore()  # Uses BGE-small by default
    vector_store.embed_chunks(chunks)

    # Step 4: Load LLM
    print("\n[4/5] 🧠 Loading LLM...")
    qa_bot = CodeQABot()  # Uses Qwen2.5-Coder-3B by default

    # Step 5: Interactive Q&A
    print("\n[5/5] ✅ System ready!")
    print("="*70)

    # Example queries
    queries = [
        # "What does this repository do? Give me a brief overview.",
        # "How do I use the checkout action in a GitHub workflow?",
        # "Show me the main entry point and explain what it does.",
        # "What parameters can I configure for this action?"
    ]

    for query in queries:
        print(f"\n{'='*70}")
        print(f"❓ QUERY: {query}")
        print(f"{'='*70}")

        # Retrieve relevant chunks
        results = vector_store.search(query, top_k=5)

        print(f"\n📚 Retrieved {len(results)} relevant code chunks:")
        for i, chunk in enumerate(results[:3], 1):
            name_info = f" ({chunk['name']})" if chunk.get('name') != 'unknown' else ""
            print(f"  {i}. {chunk['path']}{name_info} (lines {chunk['start_line']}-{chunk['end_line']}) [score: {chunk['score']:.3f}]")

        # Generate answer
        print("\n🤖 Generating answer...\n")
        answer = qa_bot.generate_answer(query, results)
        print(answer)
        print()

    return vector_store, qa_bot



In [34]:
def ask_question(question: str, vector_store, qa_bot):
    """Ask a custom question"""
    print(f"\n{'='*70}")
    print(f"❓ QUERY: {question}")
    print(f"{'='*70}")

    results = vector_store.search(question, top_k=5)

    print(f"\n📚 Found {len(results)} relevant chunks:")
    for i, chunk in enumerate(results[:3], 1):
        name_info = f" ({chunk['name']})" if chunk.get('name') != 'unknown' else ""
        print(f"  {i}. {chunk['path']}{name_info} (lines {chunk['start_line']}-{chunk['end_line']}) [score: {chunk['score']:.3f}]")

    print("\n🤖 ANSWER:\n")
    answer = qa_bot.generate_answer(question, results)
    print(answer)
    print()

    return answer


In [35]:

if __name__ == "__main__":
    # Run main pipeline
    vector_store, qa_bot = main()

    if vector_store and qa_bot:
        print("\n" + "="*70)
        print("✨ System Ready! You can now ask custom questions using:")
        print("   ask_question('your question here', vector_store, qa_bot)")
        print("="*70)
        # print("\nExample questions:")
        # print("  • What are all the inputs this action accepts?")
        # print("  • Show me how authentication is handled")
        # print("  • What happens when the repository doesn't exist?")
        # print("  • How does this handle submodules?")


🤖 LLM CODE Q&A BOT - RAG DEMO (Updated Jan 2026)
Models: BGE-small-en-v1.5 + Qwen2.5-Coder-3B
Python: 3.12
PyTorch: 2.9.0+cu126
CUDA Available: True
GPU: Tesla T4

[1/5] 📥 Cloning repository...
Repository already exists at ./repo
✓ Found 94 code files

[2/5] 🔍 Parsing and chunking code...
Parser not available for javascript: __init__() takes exactly 1 argument (2 given)
Parser not available for yml: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for json: __init__() takes exactly 1 argument (2 given)
Parser not available for md: __init__() takes exactly 1 argument (2 given)
Parser not available for md: __init__() takes exactly 1 argument (2 given)
Parser not available for md: __init__() takes exactly 1 argument (2 given)
Parser not availab

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: BAAI/bge-small-en-v1.5
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


✓ Embedding model loaded (dim: 384)
Generating embeddings for 302 chunks...


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

✓ Vector store created with 302 embeddings

[4/5] 🧠 Loading LLM...
Loading LLM: Qwen/Qwen2.5-Coder-14B...


config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/579 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/139 [00:00<?, ?B/s]

✓ LLM loaded (4-bit quantized, Qwen/Qwen2.5-Coder-14B)
  Memory footprint: ~11.6GB

[5/5] ✅ System ready!

✨ System Ready! You can now ask custom questions using:
   ask_question('your question here', vector_store, qa_bot)


In [36]:
ask_question("Give me what an example of using the code in pseudocode",vector_store,qa_bot)


❓ QUERY: Give me what an example of using the code in pseudocode

📚 Found 5 relevant chunks:
  1. .github/workflows/codeql-analysis.yml (chunk_0) (lines 1-59) [score: 0.667]
  2. __test__/input-helper.test.ts (chunk_0) (lines 1-80) [score: 0.655]
  3. src/misc/generate-docs.ts (chunk_0) (lines 1-80) [score: 0.650]

🤖 ANSWER:

Here is some sample pseudo-code illustrating how one could use the `updateUsage()` function from `[src/misc/generate-docs.ts]`:

```typescript
try {
  updateUsage("my-action", "/path/to/action.yml", "./docs/README.md");
  console.log("Successfully updated docs!");
} catch(e) {
  console.error("Failed updating docs:", e);
}
```



'Here is some sample pseudo-code illustrating how one could use the `updateUsage()` function from `[src/misc/generate-docs.ts]`:\n\n```typescript\ntry {\n  updateUsage("my-action", "/path/to/action.yml", "./docs/README.md");\n  console.log("Successfully updated docs!");\n} catch(e) {\n  console.error("Failed updating docs:", e);\n}\n```'

In [37]:
ask_question("Give me what an example of using it in github ",vector_store,qa_bot)


❓ QUERY: Give me what an example of using it in github 

📚 Found 5 relevant chunks:
  1. action.yml (chunk_0) (lines 1-80) [score: 0.745]
  2. README.md (chunk_65) (lines 66-145) [score: 0.732]
  3. src/git-auth-helper.ts (chunk_0) (lines 1-80) [score: 0.727]

🤖 ANSWER:

To use the `GitAuthHelper` class in GitHub Actions, you would typically interact with it through its methods within a custom action or script running inside a job in a YAML configuration file (.yml). Here’s how you might do this based on the given code snippets:

### Example Usage Scenario

Let's assume we have created an instance of our `GitAuthHelper`, named `authHelper`. This helper will be used to manage authentication configurations for Git operations during the execution of a workflow in GitHub Actions.

#### Step-by-Step Guide

1. **Create Instance:** Initialize the `GitAuthHelper` by passing required dependencies such as `git` command manager and possibly some additional settings if needed.

   ```typescript
 

'To use the `GitAuthHelper` class in GitHub Actions, you would typically interact with it through its methods within a custom action or script running inside a job in a YAML configuration file (.yml). Here’s how you might do this based on the given code snippets:\n\n### Example Usage Scenario\n\nLet\'s assume we have created an instance of our `GitAuthHelper`, named `authHelper`. This helper will be used to manage authentication configurations for Git operations during the execution of a workflow in GitHub Actions.\n\n#### Step-by-Step Guide\n\n1. **Create Instance:** Initialize the `GitAuthHelper` by passing required dependencies such as `git` command manager and possibly some additional settings if needed.\n\n   ```typescript\n   const authHelper = createAuthHelper(git);\n   ```\n\n2. **Configure Authentication**: Set up authentication credentials before performing any Git operation requiring authorization.\n\n   ```typescript\n   await authHelper.configureAuth();\n   // Perform othe

In [40]:
ask_question("Give me listask_question("Give me what an example of using it in github ",vector_store,qa_bot) of all functions which can be used in my github ",vector_store,qa_bot)


❓ QUERY: Give me list of all functions which can be used in my github workflows 

📚 Found 5 relevant chunks:
  1. src/input-helper.ts (chunk_0) (lines 1-80) [score: 0.734]
  2. __test__/input-helper.test.ts (chunk_0) (lines 1-80) [score: 0.731]
  3. src/workflow-context-helper.ts (chunk_0) (lines 1-32) [score: 0.715]

🤖 ANSWER:

Based on your request to provide a list of usable functions that can be employed within Github Workflows, it appears you're referring to functions available through actions' JavaScript API (@actions/* modules).

The libraries involved seem to cover a broad range but here’s a summary based on your files:

### src/input-helper.ts:
- **`getInputs()`**: A method designed specifically to handle parsing and validating various settings required by Git Source Actions. It relies heavily on other helper methods.

### __test__/input-helper.test.ts:
This does not contain any directly usable public functions; instead it seems like this might contain unit test cases or mock

"Based on your request to provide a list of usable functions that can be employed within Github Workflows, it appears you're referring to functions available through actions' JavaScript API (@actions/* modules).\n\nThe libraries involved seem to cover a broad range but here’s a summary based on your files:\n\n### src/input-helper.ts:\n- **`getInputs()`**: A method designed specifically to handle parsing and validating various settings required by Git Source Actions. It relies heavily on other helper methods.\n\n### __test__/input-helper.test.ts:\nThis does not contain any directly usable public functions; instead it seems like this might contain unit test cases or mocking setups to test how `getInputs` handles different scenarios.\n\n### src/workflow-context-helper.ts:\n- **`getOrganizationId()`**: Retrieves the organization identifier associated with the current workflow execution environment by reading data stored at `$env:GITHUB_EVENT_PATH`.\n\nThese files primarily appear geared to

In [41]:
ask_question("Give me 5 sentence answer explaining the codebase ",vector_store,qa_bot)


❓ QUERY: Give me 5 sentence answer explaining the codebase 

📚 Found 5 relevant chunks:
  1. .github/workflows/codeql-analysis.yml (chunk_0) (lines 1-59) [score: 0.721]
  2. .github/workflows/test.yml (chunk_0) (lines 1-80) [score: 0.669]
  3. CHANGELOG.md (chunk_130) (lines 131-190) [score: 0.659]

🤖 ANSWER:

Based on the provided GitHub Actions configuration files (.github/workflows/codeql-analysis.yml and .github/workflows/test.yml), here is an overview of the key components of the codebase:

**GitHub Workflow Configuration**

The workflows define automated processes triggered by certain events such as pushes or pull requests on specified branches. They also specify what jobs should run on which operating systems using matrices.

* `.github/workflows/codeql-analysis.yml`: Runs static analysis with CodeQL on various programming languages including JavaScript. It defines schedules for running analyses weekly on Sunday at UTC+9 hours.
* `.github/workflows/test.yml`: Builds, tests, for

'Based on the provided GitHub Actions configuration files (.github/workflows/codeql-analysis.yml and .github/workflows/test.yml), here is an overview of the key components of the codebase:\n\n**GitHub Workflow Configuration**\n\nThe workflows define automated processes triggered by certain events such as pushes or pull requests on specified branches. They also specify what jobs should run on which operating systems using matrices.\n\n* `.github/workflows/codeql-analysis.yml`: Runs static analysis with CodeQL on various programming languages including JavaScript. It defines schedules for running analyses weekly on Sunday at UTC+9 hours.\n* `.github/workflows/test.yml`: Builds, tests, formats checks, lints source codes then verifies if there are any unstaged changes after each step execution across multiple OS platforms (Ubuntu, macOS, Windows). This process occurs whenever changes happen either through direct commits onto master/main/release-* streams OR via incoming Pull Requests targe