# Final Version of the GravRag Populator:

In [None]:
!pip install requests
!pip install markdown
!pip install beautifulsoup4

In [1]:
import os
import time
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import requests
import markdown
from bs4 import BeautifulSoup

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# API endpoint information
baseUrl = "http://localhost:8000"

# Function to send request with metadata
def send_request_with_metadata(title, metadata):
    url = f"{baseUrl}/gravrag/create_memory"  # Use your existing API endpoint
    headers = {"Content-Type": "application/json"}
    body = {"content": title, "metadata": metadata}
    
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            logger.error(f"Request failed with status code {response.status_code}: {response.text}")
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    except requests.RequestException as e:
        logger.error(f"Request failed: {e}")
        return {"error": "Request failed", "details": str(e)}

# Data model to represent sections of the README
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]

# Function to parse README and break it into sections
def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            current_section = ReadmeSection(
                content='',
                heading=elem.text.strip(),
                level=level,
                parent=section_stack[-1].heading if section_stack else None,
                children=[],
                metadata={}
            )

            # Append to parent only if there's a parent
            if section_stack:
                section_stack[-1].children.append(current_section.heading)

            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += " " + elem.get_text(separator=" ", strip=True)


    #Strip any leading/trailing spaces in content for clean output
    for section in sections:
        section.content = section.content.strip()

    return sections


# Function to process README and send sections to API
def process_readme_and_send(readme_path: str):
    with open(readme_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    sections = parse_readme(content)
    
    for section in sections:
        # Prepare title and metadata without vectorization
        title = section.heading
        metadata = {
            "content": section.content,
            "level": section.level,
            "parent": section.parent,
            "children": section.children,
            "timestamp": time.time(),
            "access_count": 0,
            "relevance_score": 1.0
        }

        # Send to the API endpoint
        response = send_request_with_metadata(title, metadata)
        print(f"Sent section '{title}' to API. Metadata: {metadata}")
    

## TO RUN:

In [None]:
process_readme_and_send("README.MD") #THIS IS THE FILE PATH TO THE README

## To recall memories from the GravRAG using only TITLE:

- SCROLL DOWN FOR THE TEST RUN EXAMPLE

In [None]:
import requests
import json
baseUrl = "http://localhost:8000"
def recall_memories_using_title(title: str, top_k):
    url = f"{baseUrl}/gravrag/recall_memory"
    headers = {
        "Content-Type": "application/json"
    }
    body = {
        "query": title,
        "top_k": top_k
    }

    try:
        response = requests.post(url, headers=headers, data=json.dumps(body))

        if response.status_code == 200:
            return response.json()  # Return the JSON response if successful
        else:
            return {
                "error": f"Request failed with status code {response.status_code}",
                "details": response.text
            }
    except requests.RequestException as e:
        return {
            "error": "Request failed",
            "details": str(e)
        }


## TEST RUN:

### TITLE - TITLE THAT YOU WANT TO SEARCH FOR: 

### TOP_K - NUMBER OF ITEMS YOU WANT TO RETRIEVE

In [None]:
title = "'User Schema' to API. Metadata" # EXAMPLE TITLE
top_k = 5 #NUMBER OF ITEMS I WANT TO GET FROM DATABASE

print(recall_memories_using_title(title,top_k))

## To recall memories from the GravRAG using Title AND Metadata:

- SCROLL DOWN FOR THE TEST RUN EXAMPLE

### Please Note that the Metadata MUST be EXACTLY THE SAME as the one stored inside the DATABASE

In [None]:
import requests
import json
baseUrl = "http://localhost:8000"
def recall_memories_using_title_and_metadata(title: str, top_k):
    
    url = f"{baseUrl}/gravrag/recall_with_metadata"
    headers = {
        "Content-Type": "application/json"
    }
    body = {
        "query": title,
        "top_k": top_k
    }

    try:
        response = requests.post(url, headers=headers, data=json.dumps(body))

        if response.status_code == 200:
            return response.json()  # Return the JSON response if successful
        else:
            return {
                "error": f"Request failed with status code {response.status_code}",
                "details": response.text
            }
    except requests.RequestException as e:
        return {
            "error": "Request failed",
            "details": str(e)
        }


## TEST RUN:

### TITLE - TITLE THAT YOU WANT TO SEARCH FOR: 

### TOP_K - NUMBER OF ITEMS YOU WANT TO RETRIEVE

### METADATA - METADATA THAT YOU WANT TO SEARCH FOR

In [None]:
title = "'User Schema' to API. Metadata" # EXAMPLE TITLE
top_k = 5 #NUMBER OF ITEMS I WANT TO GET FROM DATABASE
metadata = {} #metadata example

recall_memories_using_title_and_metadata(title,top_k,metadata) 