In [None]:
!git clone https://github.com/yzjiao/KG_RAG

In [None]:
%cd KG_RAG

# 3. Comment out all known-bad packages from the requirements file
!sed -i '/neo4j/s/^/#/' requirements.txt
!sed -i '/onnxruntime/s/^/#/' requirements.txt
!sed -i '/pulsar-client/s/^/#/' requirements.txt
!sed -i '/torch/s/^/#/' requirements.txt
!sed -i '/torchvision/s/^/#/' requirements.txt
!sed -i '/aiohttp/s/^/#/' requirements.txt
!sed -i '/greenlet/s/^/#/' requirements.txt
!sed -i '/pyarrow/s/^/#/' requirements.txt
!sed -i '/safetensors/s/^/#/' requirements.txt
!sed -i '/tokenizers/s/^/#/' requirements.txt
!sed -i '/uvloop/s/^/#/' requirements.txt

# 4. Install all the *other* packages from the file
!pip install -r requirements.txt

# 5. Install all the patched/missing packages
!pip install neo4j onnxruntime pulsar-client torch torchvision aiohttp greenlet pyarrow safetensors tokenizers uvloop langchain_community chromadb langchain-huggingface

print("‚úÖ All dependencies installed.")

In [None]:
!pip install chromadb langchain-huggingface
!pip install langchain_community

In [None]:
# --- Patch 1: Fix the hardcoded model name ---
!sed -i 's/model_name="gemini-1.5-flash-latest"/model_name="gemini-1.5-flash"/g' /content/KG_RAG/kg_rag/utility.py
!sed -i 's/model_name="gemini-pro"/model_name="gemini-1.5-flash"/g' /content/KG_RAG/kg_rag/utility.py
!sed -i 's/model_name="models\/gemini-1.5-flash-latest"/model_name="gemini-1.5-flash"/g' /content/KG_RAG/kg_rag/utility.py

!sed -i 's/model_name="gemini-1.5-flash"/model_name="models\/gemini-flash-latest"/g' /content/KG_RAG/kg_rag/utility.py

!sed -i 's/            node_hits.append(node_search_result\[0\]\[0\].page_content)/            if node_search_result: node_hits.append(node_search_result[0][0].page_content)/' /content/KG_RAG/kg_rag/utility.py

print("‚úÖ utility.py has been permanently patched with all necessary fixes.")

In [None]:
!chmod +x run_gemini.sh
!./run_gemini.sh

In [None]:
import pandas as pd
import os
import re

def calculate_accuracies():
    """
    Loads all CSV files in the folder, intelligently extracts the LLM's answer,
    and calculates the final accuracy for each.
    """

    # Folder containing your result CSV files
    base_path = "Data"

    # Automatically find all CSV files in the folder
    file_paths = {
        os.path.splitext(filename)[0]: os.path.join(base_path, filename)
        for filename in os.listdir(base_path)
        if filename.lower().endswith(".csv")
    }

    results = {}

    print("--- üìä Calculating Accuracies ---")

    # Regex to extract `"answer": "VALUE"`
    answer_pattern = re.compile(r'"answer"\s*:\s*"([^"]+)"', re.IGNORECASE)

    for name, path in file_paths.items():
        if not os.path.exists(path):
            print(f"‚ö†Ô∏è File not found, skipping: {path}")
            continue

        try:
            df = pd.read_csv(path)

            def check_correctness(row):
                """
                Extracts and compares the answer from the LLM output.
                """
                correct_answer = str(row.get('correct_answer', '')).strip()
                llm_answer_raw = str(row.get('llm_answer', '')).strip()

                match = answer_pattern.search(llm_answer_raw)
                if match:
                    extracted_answer = match.group(1).strip()
                    return extracted_answer == correct_answer
                else:
                    return llm_answer_raw == correct_answer

            # Create 'is_correct' column
            df['is_correct'] = df.apply(check_correctness, axis=1)

            # Calculate accuracy
            accuracy = df['is_correct'].mean()
            results[name] = accuracy

        except Exception as e:
            print(f"‚ùå Error processing file {path}: {e}")

    print("\n--- üìà Final Results ---")

    if not results:
        print("No CSV files were found or processed successfully in `data/my_results/`.")
    else:
        print("| File | Accuracy Score |")
        print("| :--- | :--- |")
        for name, acc in sorted(results.items()):
            print(f"| {name} | {acc*100:.2f}% |")

    print("\n‚úÖ This table is ready to be copied into your final report.")

# --- Run the function ---
if __name__ == "__main__":
    calculate_accuracies()
