# Basic GraphRAG Implementation

In [1]:
!pip install -q graphrag==2.5.0 ruamel.yaml==0.18.15 \
                2>/dev/null # Suppress unfatal errors

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.8/80.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.0/88.0 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.5/46.5 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m370.4/370.4 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.7/119.7 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
from google.colab import userdata

# os.environ['GRAPHRAG_API_KEY'] = "GRAPHRAG_API_KEY"

os.environ['GRAPHRAG_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [3]:
from huggingface_hub import hf_hub_download

# Input data directory
os.makedirs("/content/ragtest/input", exist_ok=True)

# Downloading text file from huggingface
hf_hub_download(repo_id="jaiganesan/research_papers", repo_type="dataset", filename="LoRA.txt",local_dir="/content/ragtest/input")

LoRA.txt: 0.00B [00:00, ?B/s]

'/content/ragtest/input/LoRA.txt'

In [4]:
# Initializing GraphRAG Project
# Ignore the Warnings

!python -m graphrag init --root /content/ragtest

2025-09-19 08:52:10.0235 - INFO - graphrag.cli.initialize - Initializing project at /content/ragtest


In [5]:
import re
import yaml

def update_yaml_ruamel(file_path, path, new_value):
    """
    Update YAML using ruamel.yaml which preserves comments and formatting

    Installation: pip install ruamel.yaml
    """
    from ruamel.yaml import YAML

    yaml = YAML()
    yaml.preserve_quotes = True
    yaml.width = 4096

    with open(file_path, 'r') as file:
        data = yaml.load(file)

    # Navigate to the target location
    keys = path.split('.')
    current = data

    for key in keys[:-1]:
        if key not in current:
            current[key] = {}
        current = current[key]

    # Set the new value
    current[keys[-1]] = new_value

    # Write back to file
    with open(file_path, 'w') as file:
        yaml.dump(data, file)

    print(f"Updated {path} = {new_value}")

file_path = '/content/ragtest/settings.yaml'

update_yaml_ruamel(file_path, 'models.default_chat_model.model', 'gpt-4o') # GPT 5 Might give some error


Updated models.default_chat_model.model = gpt-4o


In [6]:
# Indexing

!python -m graphrag index --root /content/ragtest

2025-09-19 09:17:49.836344: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758273470.172930    6960 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758273470.259012    6960 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758273470.951592    6960 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758273470.951662    6960 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758273470.951668    6960 computation_placer.cc:177] computation placer alr

In [7]:
# Global Search

!python -m graphrag query \
           --root ./ragtest \
           --method global \
           --query " what this text document about?"

2025-09-19 09:23:39.128703: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758273819.154012    8427 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758273819.161668    8427 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758273819.182323    8427 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758273819.182364    8427 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758273819.182369    8427 computation_placer.cc:177] computation placer alr

In [8]:
# Local search

!python -m graphrag query \
           --root ./ragtest \
           --method local \
           --query " what this Low Rank Adaptation"

2025-09-19 09:24:51.123792: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758273891.157581    8737 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758273891.164839    8737 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758273891.184152    8737 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758273891.184225    8737 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758273891.184232    8737 computation_placer.cc:177] computation placer alr

### Important Note: You must update the file path based on your implementation as the file name <time stamp> change with each implementation. You can also visualize the Parquet file using Pandas.

In [9]:
import duckdb

# Query the Parquet file

query_result = duckdb.query("SELECT * FROM '/content/ragtest/output/entities.parquet'")
print(query_result)

┌──────────────────────────────────────┬───────────────────┬───────────────────────┬──────────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬───────────

In [11]:
ORGANIZATION_result = duckdb.query("""
    SELECT *
    FROM '/content/ragtest/output/entities.parquet'
    WHERE type = 'EVENT'
""")

# Print the result
print(ORGANIZATION_result)

┌──────────────────────────────────────┬───────────────────┬───────────────────────────────┬─────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────