# Graph generation

In [1]:
import os

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

## Load (pre-created) text units

In [2]:
import pandas as pd

df_text_units = pd.read_parquet("sample-data/base_text_units.parquet")

print(f"Number of rows: {len(df_text_units)}")

print("Displaying first 5 rows:")
df_text_units.head()

Number of rows: 39
Displaying first 5 rows:


Unnamed: 0,id,document_id,text_unit
0,f28e49bc-5b67-46b3-b971-6d6cb2832790,a0192baf-d76a-40d4-bcd3-437127eef568,"﻿ A CHRISTMAS CAROL\n\n [Illustration: _""How..."
1,6fae26d7-9b26-4f79-ac78-970e69fcab95,a0192baf-d76a-40d4-bcd3-437127eef568,"at the grindstone, Scrooge! a\nsqueezing, wre..."
2,c93ae0c0-c8c3-49a9-beb0-a1e3b74efa0a,a0192baf-d76a-40d4-bcd3-437127eef568,dismal? What reason have you to be morose? You...
3,eef3623c-46d7-4c17-99e6-a4801a779a39,a0192baf-d76a-40d4-bcd3-437127eef568,in Scrooge's office. They had books and paper...
4,6e9a467e-3a92-45d3-acd4-2d1c6996ee28,a0192baf-d76a-40d4-bcd3-437127eef568,"mighty Mansion House, gave orders to his\nfif..."


## Create dependencies

The `GraphExtractor` uses -

- EntityRelationshipExtractor
- GraphsMerger
- EntityRelationshipDescriptionSummarizer

### EntityRelationshipExtractor

In [3]:
from langchain_openai import ChatOpenAI
from langchain_community.cache import SQLiteCache
from langchain_graphrag.indexing.graph_generation import EntityRelationshipExtractor

openai_api_key = os.getenv("LANGCHAIN_GRAPHRAG_OPENAI_CHAT_API_KEY", None)

if openai_api_key is None:
    raise ValueError("Please set the LANGCHAIN_GRAPHRAG_OPENAI_CHAT_API_KEY environment variable")

er_llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.0,
    api_key=openai_api_key,
    cache=SQLiteCache("openai_cache.db"), # always a good idea to use Cache
)

# There is a static method provide to build the default extractor
extractor = EntityRelationshipExtractor.build_default(llm=er_llm)

### GraphsMerger

In [4]:
from langchain_graphrag.indexing.graph_generation import GraphsMerger

graphs_merger = GraphsMerger()

### EntityRelationshipDescriptionSummarizer

In [5]:
from langchain_graphrag.indexing.graph_generation import (
    EntityRelationshipDescriptionSummarizer,
)

es_llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.0,
    api_key=openai_api_key,
    cache=SQLiteCache("openai_cache.db"), # always a good idea to use Cache
)

summarizer = EntityRelationshipDescriptionSummarizer.build_default(llm=es_llm)

## Finally generating the graph

In [6]:
from langchain_graphrag.indexing.graph_generation import (
    GraphGenerator,
)

graph_generator = GraphGenerator(
    er_extractor=extractor,
    graphs_merger=GraphsMerger(),
    er_description_summarizer=summarizer,
)

In [7]:
graph = graph_generator.run(df_text_units)

Extracting entities and relationships ...: 100%|██████████| 39/39 [00:00<00:00, 157.56it/s]
Summarizing entities descriptions: 100%|██████████| 116/116 [00:00<00:00, 1075.01it/s]
Summarizing relationship descriptions: 100%|██████████| 160/160 [00:02<00:00, 63.72it/s]


In [8]:
print(f"Number of nodes - {len(graph.nodes)}")
print(f"Number of edges - {len(graph.edges)}")

Number of nodes - 116
Number of edges - 160
