In [1]:
from typing import List, Optional, Union
from pydantic import BaseModel, Field

from langchain_community.chat_models.openai import ChatOpenAI

from langchain.graphs import Neo4jGraph
from langchain.graphs.graph_document import (
    Node as BaseNode,
    Relationship as BaseRelationship,
    GraphDocument
)


from typing import List, Dict, Any, Optional
from langchain.pydantic_v1 import Field, BaseModel

from langchain_openai import AzureChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains.openai_functions import create_structured_output_chain
from langchain.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain.prompts.prompt import PromptTemplate
#from langchain.chat_models import AzureChatOpenAI
#from langchain_community.chat_models import AzureChatOpenAI
from dotenv import load_dotenv

from tqdm import tqdm
load_dotenv()



True

In [2]:
from langchain.graphs import Neo4jGraph
import os

#url = "neo4j+s://databases.neo4j.io"
from langchain_community.graphs import Neo4jGraph

url=os.getenv("NEO4J_URI","bolt://localhost:7687")
username=os.getenv("NEO4J_USER","neo4j")
password=os.getenv("NEO4J_PASSWORD","Pk@12345678")
#url=os.getenv("NEO4J_URI","neo4j+s://5292e0f3.databases.neo4j.io")
#username=os.getenv("NEO4J_USER","neo4j")
#password=os.getenv("NEO4J_PASSWORD","uT0_PcdAy6VlQSfXROAFOi34yVO5h_0aoO3Sm9w0TPM")

graph = Neo4jGraph(url=url, username=username, password=password)
print(graph)

<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x0000021E9754A250>


In [3]:


class Property(BaseModel):
  """A single property consisting of key and value"""
  key: str = Field(..., description="key")
  value: str = Field(..., description="value")

class Node(BaseNode):
    properties: Optional[List[Property]] = Field(
        None, description="List of node properties")

class Relationship(BaseRelationship):
    properties: Optional[List[Property]] = Field(
        None, description="List of relationship properties"
    )

In [4]:
# Define mapping functions
def map_to_base_node(node: Node) -> BaseNode:
    return BaseNode(
        id=node.id,
        type=node.type,
        properties={prop.key: prop.value for prop in node.properties} if node.properties else {}
    )

def map_to_base_relationship(rel: Relationship) -> BaseRelationship:
    return BaseRelationship(
        source=BaseNode(
            id=rel.source.id,
            type=rel.source.type,
            properties={prop.key: prop.value for prop in rel.source.properties} if rel.source.properties else {}
        ),
        target=BaseNode(
            id=rel.target.id,
            type=rel.target.type,
            properties={prop.key: prop.value for prop in rel.target.properties} if rel.target.properties else {}
        ),
        type=rel.type,
        properties={prop.key: prop.value for prop in rel.properties} if rel.properties else {}
    )

In [5]:
class KnowledgeGraph(BaseModel):
    """Generate a knowledge graph with entities and relationships."""
    nodes: List[Node] = Field(
        ..., description="List of nodes in the knowledge graph")
    rels: List[Relationship] = Field(
        ..., description="List of relationships in the knowledge graph"
    )

llm=AzureChatOpenAI(azure_deployment=os.environ['EmbeddingModelDeploymentName'],model="gpt-35-turbo-16k",azure_endpoint=os.environ['OpenAIEndpoint'],api_key=os.environ['OpenAIKey'],api_version=os.environ['OpenAIVersion'])

print(llm)

In [6]:
os.environ["OPENAI_API_KEY"] = 'fe77aff6afd640f78539d8fcad008a4c'
# OpenAI API configuration
llm=AzureChatOpenAI(azure_deployment=os.environ['EmbeddingModelDeploymentName'],model="gpt-35-turbo-16k",azure_endpoint=os.environ['OpenAIEndpoint'],api_key=os.environ['OpenAIKey'],api_version=os.environ['OpenAIVersion'])

def get_extraction_chain(
    allowed_nodes: Optional[List[str]] = None,
    allowed_rels: Optional[List[str]] = None
    ):
    prompt = ChatPromptTemplate.from_messages(
    [(
      "system",
      f"""# Knowledge Graph Instructions for GPT-4
## 1. Overview
You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
  - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
- **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
{'- **Allowed Node Labels:**' + ", ".join(allowed_nodes) if allowed_nodes else ""}
{'- **Allowed Relationship Types**:' + ", ".join(allowed_rels) if allowed_rels else ""}
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), 
always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.  
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial. 
## 5. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination."""),
        ("human", "Use the given format to extract information from the following input: {input}"),
        ("human", "Tip: Make sure to answer in the correct format"),
    ])
    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)

In [7]:
from langchain_core.documents import Document
def extract_and_store_graph(
    document: Document,
    nodes:Optional[List[str]] = None,
    rels:Optional[List[str]]=None) -> None:
    # Extract graph data using OpenAI functions
    extract_chain = get_extraction_chain(nodes, rels)
    data = extract_chain.run(document.page_content)
    print(data.nodes)
    print(data.rels)
    # Construct a graph document
    graph_document = GraphDocument(
      nodes = [map_to_base_node(node) for node in data.nodes],
      relationships = [map_to_base_relationship(rel) for rel in data.rels],
      source = document
    )
    # Store information into a graph
    graph.add_graph_documents([graph_document])

In [8]:
print(graph)

<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x0000021E9754A250>


In [13]:
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter

# Read the wikipedia article
#raw_documents = WikipediaLoader(query="Walt Disney").load()
raw_documents = WikipediaLoader(query="Virat Kohli").load()
# Define chunking strategy
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=24)

# Only take the first the raw_documents
documents1 = text_splitter.split_documents(raw_documents[:])
print(len(documents))

1


In [15]:
print(len(documents1))
documents =documents1 #text_splitter.split_documents(raw_documents[:1])
print(len(documents))

25
25


In [16]:


for i, d in tqdm(enumerate(documents), total=len(documents)):
    extract_and_store_graph(d)

  0%|                                                                                           | 0/25 [00:00<?, ?it/s]

[Node(id='Virat Kohli', type='person', properties=[Property(key='name', value='Virat Kohli'), Property(key='birthDate', value='5 November 1988')]), Node(id='Indian national cricket team', type='team', properties=[Property(key='name', value='Indian national cricket team')]), Node(id='Royal Challengers Bengaluru', type='team', properties=[Property(key='name', value='Royal Challengers Bengaluru')]), Node(id='Delhi', type='team', properties=[Property(key='name', value='Delhi')]), Node(id='IPL', type='league', properties=[Property(key='name', value='IPL'), Property(key='team', value='Royal Challengers Bengaluru')]), Node(id='T20I', type='format', properties=[Property(key='name', value='T20I')]), Node(id='ODI', type='format', properties=[Property(key='name', value='ODI')]), Node(id='ICC Test mace', type='trophy', properties=[Property(key='name', value='ICC Test mace')]), Node(id='2011 Cricket World Cup', type='tournament', properties=[Property(key='name', value='2011 Cricket World Cup'), Pro

  8%|██████▋                                                                            | 2/25 [00:42<07:07, 18.60s/it]

[Node(id='Virat Kohli', type='person', properties=[Property(key='seniorCareer', value='List A cricket, playing against Services in the Ranji One-Day Trophy'), Property(key='internationalCareer', value='representing India since included in the ODl squad for the tour of Sri Lanka'), Property(key='achievements', value='won 2011 Cricket World Cup, won 2013 ICC Champions Trophy, won several Asia Cups'), Property(key='youthCareer', value='started in October 2002 at Luhnu cricket ground, scored 15 runs in debut match, first half-century against Haryana'), Property(key='domesticCareer', value='played for Delhi, highest run-scorer in 2002 season with 172 runs, captain of Under-15 team in 2003-04 season'), Property(key='firstClassDebut', value='made debut on 23 November 2006 against Tamil Nadu in Ranji Trophy'), Property(key='emotionalInnings', value="scored 90 runs despite father's passing away during the match"), Property(key='leagueCricket', value='played for Royal Challengers Bengaluru since

 12%|█████████▉                                                                         | 3/25 [01:47<14:38, 39.95s/it]

[Node(id='RCB', type='team', properties=[Property(key='name', value='Royal Challengers Bangalore'), Property(key='officialName', value='Royal Challengers Bengaluru'), Property(key='founded', value='2008'), Property(key='homeGround', value='M. Chinnaswamy Stadium'), Property(key='value', value='$69.8 million'), Property(key='captain', value='Faf du Plessis')]), Node(id='IPL', type='tournament', properties=[Property(key='name', value='Indian Premier League')]), Node(id='United Spirits', type='company', properties=[Property(key='name', value='United Spirits')]), Node(id='Royal Challenge', type='liquorBrand', properties=[Property(key='name', value='Royal Challenge')]), Node(id='M. Chinnaswamy Stadium', type='stadium', properties=[Property(key='name', value='M. Chinnaswamy Stadium'), Property(key='location', value='Bangalore, Karnataka')]), Node(id='Faf du Plessis', type='person', properties=[Property(key='name', value='Faf du Plessis'), Property(key='nationality', value='South African')]),

 16%|█████████████▎                                                                     | 4/25 [02:23<13:26, 38.41s/it]

[Node(id='Anushka Sharma', type='person', properties=[Property(key='name', value='Anushka Sharma'), Property(key='pronunciation', value='[əˈnʊʃka ˈʃərma]'), Property(key='birthDate', value='1 May 1988'), Property(key='nationality', value='Indian'), Property(key='occupation', value='actress'), Property(key='awards', value='Filmfare Award'), Property(key='highestPaidActress', value='2018'), Property(key='featuredInForbes', value='2012-2018')]), Node(id='Ayodhya', type='place', properties=[Property(key='name', value='Ayodhya')]), Node(id='Bangalore', type='place', properties=[Property(key='name', value='Bangalore')]), Node(id='Wendell Rodricks', type='person', properties=[Property(key='name', value='Wendell Rodricks'), Property(key='occupation', value='fashion designer')]), Node(id='Mumbai', type='place', properties=[Property(key='name', value='Mumbai')]), Node(id='Shah Rukh Khan', type='person', properties=[Property(key='name', value='Shah Rukh Khan'), Property(key='occupation', value='a

 20%|████████████████▌                                                                  | 5/25 [03:06<13:22, 40.12s/it]

[Node(id='IPL 2024', type='event', properties=[Property(key='name', value='Indian Premier League 2024'), Property(key='alsoKnownAs', value='IPL 17'), Property(key='brand', value='TATA IPL 2024'), Property(key='edition', value='17th')]), Node(id='BCCI', type='organization', properties=[Property(key='name', value='Board of Control for Cricket in India')]), Node(id='teams', type='concept', properties=[Property(key='name', value='teams'), Property(key='description', value='franchise Twenty20 cricket teams')]), Node(id='Chennai', type='city', properties=[Property(key='name', value='Chennai')]), Node(id='CSK', type='team', properties=[Property(key='name', value='Chennai Super Kings')]), Node(id='Gujarat Titans', type='team', properties=[Property(key='name', value='Gujarat Titans')]), Node(id='KKR', type='team', properties=[Property(key='name', value='Kolkata Knight Riders')]), Node(id='SRH', type='team', properties=[Property(key='name', value='Sunrisers Hyderabad')])]
[Relationship(source=No

 24%|███████████████████▉                                                               | 6/25 [03:18<09:40, 30.56s/it]

[Node(id='2024', type='season', properties=[Property(key='number', value='17th')]), Node(id='Indian Premier League', type='league', properties=[]), Node(id='Royal Challengers Bengaluru', type='team', properties=[]), Node(id='10', type='team', properties=[]), Node(id='Bengaluru', type='city', properties=[]), Node(id='League stage', type='stage', properties=[]), Node(id='6th', type='position', properties=[]), Node(id='18 May 2024', type='date', properties=[]), Node(id='Playoffs', type='stage', properties=[]), Node(id='4th', type='position', properties=[]), Node(id='7', type='winCount', properties=[]), Node(id='7', type='lossCount', properties=[]), Node(id='14', type='points', properties=[]), Node(id='Eliminator', type='stage', properties=[]), Node(id='Rajasthan Royals', type='team', properties=[]), Node(id='22nd May', type='date', properties=[]), Node(id='Ahmedabad', type='city', properties=[]), Node(id='4th', type='position', properties=[]), Node(id='Virat Kohli', type='player', propert

 28%|███████████████████████▏                                                           | 7/25 [03:49<09:12, 30.72s/it]

[Node(id='ICC Awards of the Decade', type='award', properties=[]), Node(id='ICC Awards annual awards programme', type='award', properties=[]), Node(id='Awards Nominations Committee', type='committee', properties=[]), Node(id='Sir Garfield Sobers and Rachael Heyhoe Flint Awards', type='award', properties=[]), Node(id="men's game", type='sport', properties=[]), Node(id="women's game", type='sport', properties=[]), Node(id='fan vote', type='voting', properties=[]), Node(id='expert panel', type='voting', properties=[]), Node(id='ICC World XI Teams', type='team', properties=[]), Node(id='individual ICC awards', type='award', properties=[]), Node(id='Virat Kohli', type='person', properties=[])]
[Relationship(source=Node(id='ICC Awards of the Decade', type='award'), target=Node(id='ICC Awards annual awards programme', type='award'), type='is_a'), Relationship(source=Node(id='ICC Awards of the Decade', type='award'), target=Node(id='Awards Nominations Committee', type='committee'), type='has_c

 32%|██████████████████████████▌                                                        | 8/25 [04:08<07:39, 27.02s/it]

[Node(id='Rohit Gurunath Sharma', type='person', properties=[Property(key='birthDate', value='30 April 1987'), Property(key='nationality', value='Indian'), Property(key='role', value='cricketer'), Property(key='captain', value='India national cricket team'), Property(key='batsmanHand', value='right-handed'), Property(key='teams', value='Mumbai Indians, Mumbai')]), Node(id='India national cricket team', type='team', properties=[]), Node(id='Mumbai Indians', type='team', properties=[]), Node(id='Mumbai', type='team', properties=[]), Node(id='MS Dhoni', type='person', properties=[Property(key='titleWins', value='5')]), Node(id='IPL', type='tournament', properties=[]), Node(id='2007 T20 World Cup', type='tournament', properties=[]), Node(id='2013 ICC Champions Trophy', type='tournament', properties=[]), Node(id='ICC T20 World Cup', type='tournament', properties=[]), Node(id='highest individual score', type='record', properties=[Property(key='score', value='264'), Property(key='format', val

 36%|█████████████████████████████▉                                                     | 9/25 [04:30<06:47, 25.45s/it]

[Node(id='India', type='country', properties=[]), Node(id="India_men's_national_cricket_team", type='sportsTeam', properties=[Property(key='name', value="India men's national cricket team"), Property(key='represents', value='India'), Property(key='governed_by', value='Board of Control for Cricket in India (BCCI)'), Property(key='member_of', value='International Cricket Council (ICC)'), Property(key='test_status', value='Test, ODI, T20I'), Property(key='first_match', value="25 June 1932 against England at Lord's Cricket Ground in London")]), Node(id='Board_of_Control_for_Cricket_in_India', type='organization', properties=[Property(key='name', value='Board of Control for Cricket in India (BCCI)')]), Node(id='International_Cricket_Council', type='organization', properties=[Property(key='name', value='International Cricket Council (ICC)')]), Node(id='Cricket_World_Cup', type='tournament', properties=[Property(key='name', value='Cricket World Cup'), Property(key='times_won', value='2'), Pro

 44%|████████████████████████████████████                                              | 11/25 [05:31<06:00, 25.77s/it]

[Node(id='Virat', type='person', properties=[Property(key='name', value='Virat'), Property(key='gender', value='male'), Property(key='nationality', value='Indian')]), Node(id='Viraat Badhwar', type='person', properties=[Property(key='name', value='Viraat Badhwar'), Property(key='gender', value='male'), Property(key='nationality', value='Australian'), Property(key='occupation', value='golfer'), Property(key='birthDate', value='2000')]), Node(id='Virat Kohli', type='person', properties=[Property(key='name', value='Virat Kohli'), Property(key='gender', value='male'), Property(key='nationality', value='Indian'), Property(key='occupation', value='cricketer'), Property(key='birthDate', value='1988')]), Node(id='Virat Singh', type='person', properties=[Property(key='name', value='Virat Singh'), Property(key='gender', value='male'), Property(key='nationality', value='Indian'), Property(key='occupation', value='cricketer'), Property(key='birthDate', value='1997')])]
[]
[Node(id='2024 season', t

 48%|███████████████████████████████████████▎                                          | 12/25 [05:44<04:43, 21.79s/it]

[Node(id='Shreyas Santosh Iyer', type='person', properties=[Property(key='birthDate', value='6 December 1994'), Property(key='nationality', value='Indian'), Property(key='role', value='cricketer')]), Node(id='Indian cricket team', type='team', properties=[Property(key='sport', value='cricket')]), Node(id='right-handed batter', type='position'), Node(id='New Zealand', type='country'), Node(id='Mumbai', type='team', properties=[Property(key='sport', value='cricket')]), Node(id='Kolkata Knight Riders', type='team', properties=[Property(key='sport', value='cricket'), Property(key='captain', value='Shreyas Santosh Iyer')]), Node(id='India Under-19 cricket team', type='team', properties=[Property(key='sport', value='cricket')]), Node(id='2014 ICC Under-19 Cricket World Cup', type='tournament', properties=[Property(key='sport', value='cricket')]), Node(id='2023 Cricket World Cup', type='tournament', properties=[Property(key='sport', value='cricket')]), Node(id='blitzering century', type='achi

 52%|██████████████████████████████████████████▋                                       | 13/25 [06:24<05:28, 27.33s/it]

[Node(id='BCCI', type='organization', properties=[Property(key='name', value='Board of Control for Cricket in India'), Property(key='headquarters', value='Cricket Centre, Churchgate, Mumbai'), Property(key='establishedDate', value='1 December 1928'), Property(key='registeredUnder', value='Tamil Nadu Societies Registration Act, 1975'), Property(key='president', value='Roger Binny'), Property(key='secretary', value='Jay Shah'), Property(key='revenue', value='₹16,875 crore (US$2.1 billion) (2023-2024)'), Property(key='taxesPaid', value='₹4,000 crore (US$500 million) (2022-2023)')]), Node(id='Cricket Centre', type='location', properties=[Property(key='name', value='Cricket Centre'), Property(key='city', value='Mumbai')]), Node(id='Madras', type='location', properties=[Property(key='name', value='Madras'), Property(key='currentName', value='Chennai')]), Node(id='International Cricket Council', type='organization', properties=[Property(key='name', value='International Cricket Council')]), No

 56%|█████████████████████████████████████████████▉                                    | 14/25 [06:57<05:19, 29.03s/it]

[Node(id='2024 season', type='season', properties=[Property(key='seasonNumber', value='15')]), Node(id='Chennai Super Kings', type='team', properties=[Property(key='franchise', value='Indian Premier League'), Property(key='defendingChampions', value='true'), Property(key='IPLTitles', value='5')]), Node(id='MS Dhoni', type='person', properties=[Property(key='captaincy', value='handovered')]), Node(id='Ruturaj Gaikwad', type='person', properties=[Property(key='captaincy', value='received')])]
[Relationship(source=Node(id='2024 season', type='season'), target=Node(id='Chennai Super Kings', type='team'), type='participatedIn'), Relationship(source=Node(id='Chennai Super Kings', type='team'), target=Node(id='2024 Indian Premier League', type='tournament'), type='participatedIn'), Relationship(source=Node(id='Chennai Super Kings', type='team'), target=Node(id='Gujarat Titans', type='team'), type='defeated'), Relationship(source=Node(id='2024 Indian Premier League', type='tournament'), target

 60%|█████████████████████████████████████████████████▏                                | 15/25 [07:07<03:53, 23.37s/it]

[Node(id='2023', type='CricketWorldCup', properties=[Property(key='host', value='India')]), Node(id='Wankhede Stadium', type='Stadium'), Node(id='Mumbai', type='City'), Node(id='Eden Gardens', type='Stadium'), Node(id='Kolkata', type='City'), Node(id='Narendra Modi Stadium', type='Stadium', properties=[Property(key='location', value='Ahmedabad')]), Node(id='Pakistan', type='Country'), Node(id='India', type='Country'), Node(id='Sri Lanka', type='Country'), Node(id='South Africa', type='Country'), Node(id='New Zealand', type='Country'), Node(id='Afghanistan', type='Country'), Node(id='Australia', type='Country'), Node(id='England', type='Country')]
[Relationship(source=Node(id='2023', type='CricketWorldCup'), target=Node(id='Wankhede Stadium', type='Stadium'), type='venue'), Relationship(source=Node(id='2023', type='CricketWorldCup'), target=Node(id='Eden Gardens', type='Stadium'), type='venue'), Relationship(source=Node(id='2023', type='CricketWorldCup'), target=Node(id='Narendra Modi S

 68%|███████████████████████████████████████████████████████▊                          | 17/25 [07:39<02:32, 19.11s/it]

[Node(id='Kolkata Knight Riders', type='team', properties=[Property(key='name', value='Kolkata Knight Riders'), Property(key='IPL titles', value='3')]), Node(id='Royal Challengers Bangalore', type='team', properties=[Property(key='name', value='Royal Challengers Bangalore'), Property(key='IPL titles', value='0')]), Node(id='Virat Kohli', type='person', properties=[Property(key='name', value='Virat Kohli'), Property(key='runs', value='962')]), Node(id='Sunil Narine', type='person', properties=[Property(key='name', value='Sunil Narine'), Property(key='wickets', value='26')])]
[Relationship(source=Node(id='Kolkata Knight Riders', type='team'), target=Node(id='Royal Challengers Bangalore', type='team'), type='rivalry', properties=[Property(key='description', value='The Kolkata Knight Riders-Royal Challengers Bangalore is a cricket rivalry between the IPL teams of Kolkata Knight Riders and Royal Challengers Bangalore in the Indian Premier League (IPL) and the defunct Champions League Twenty

 76%|██████████████████████████████████████████████████████████████▎                   | 19/25 [08:28<02:03, 20.55s/it]

[Node(id='Royal Challengers Bangalore', type='team', properties=[Property(key='name', value='Royal Challengers Bangalore'), Property(key='captain', value='Virat Kohli'), Property(key='coach', value='Mike Hesson')]), Node(id='IPL 2021', type='season', properties=[Property(key='year', value='2021')]), Node(id='Virat Kohli', type='person', properties=[]), Node(id='Mike Hesson', type='person', properties=[]), Node(id='Kolkata Knight Riders', type='team', properties=[Property(key='name', value='Kolkata Knight Riders')]), Node(id='AB de Villiers', type='person', properties=[Property(key='retiredDate', value='19 November 2021')])]
[Relationship(source=Node(id='IPL 2021', type='season'), target=Node(id='Royal Challengers Bangalore', type='team'), type='participated'), Relationship(source=Node(id='Royal Challengers Bangalore', type='team'), target=Node(id='Virat Kohli', type='person'), type='captainedBy'), Relationship(source=Node(id='Royal Challengers Bangalore', type='team'), target=Node(id='

 80%|█████████████████████████████████████████████████████████████████▌                | 20/25 [09:44<03:04, 36.97s/it]

[Node(id='Mohammad Amir', type='person', properties=[Property(key='name', value='Mohammad Amir'), Property(key='nationality', value='Pakistani'), Property(key='birthDate', value='13 April 1992'), Property(key='role', value='cricketer'), Property(key='battingHand', value='left'), Property(key='bowlingHand', value='left-arm fast')]), Node(id='Pakistan national cricket team', type='team', properties=[Property(key='name', value='Pakistan national cricket team')]), Node(id='2009 ICC World Twenty20', type='tournament', properties=[Property(key='name', value='2009 ICC World Twenty20')]), Node(id='2017 ICC Champions Trophy', type='tournament', properties=[Property(key='name', value='2017 ICC Champions Trophy')]), Node(id='spot-fixing', type='scandal', properties=[Property(key='name', value='spot-fixing')]), Node(id='Salman Butt', type='person', properties=[Property(key='name', value='Salman Butt')]), Node(id='Mohammad Asif', type='person', properties=[Property(key='name', value='Mohammad Asif'

 84%|████████████████████████████████████████████████████████████████████▉             | 21/25 [10:11<02:16, 34.11s/it]

[Node(id='Sanju Viswanath Samson', type='person', properties=[Property(key='birthDate', value='11 November 1994'), Property(key='nationality', value='Indian')]), Node(id='Kerala', type='place'), Node(id='Rajasthan Royals', type='cricket team'), Node(id='Indian Premier League', type='cricket league'), Node(id='Wicket-keeper', type='cricket position'), Node(id='batter', type='cricket position'), Node(id='KCA Royals', type='cricket team'), Node(id='Kerala Premier League', type='cricket league'), Node(id="President's Cup T20", type='cricket tournament'), Node(id='right-handed', type='batting style'), Node(id='vice-captain', type='cricket position'), Node(id='Indian U-19 team', type='cricket team'), Node(id='2014 Under-19 Cricket World Cup', type='cricket tournament'), Node(id='2015 T20 international', type='cricket tournament'), Node(id='Zimbabwe', type='country'), Node(id='One Day International', type='cricket format'), Node(id='2021', type='year'), Node(id='Sri Lanka', type='country'), N

 92%|███████████████████████████████████████████████████████████████████████████▍      | 23/25 [11:58<01:19, 39.63s/it]

[Node(id='Eden Gardens', type='stadium', properties=[Property(key='name', value='Eden Gardens'), Property(key='location', value='Kolkata, India'), Property(key='established', value='1864'), Property(key='capacity', value='68,000'), Property(key='owner', value='Cricket Association of Bengal (CAB)'), Property(key='homeGroundOf', value='Kolkata Knight Riders'), Property(key='headquarters', value='Cricket Association of Bengal')])]
[Relationship(source=Node(id='Eden Gardens', type='stadium'), target=Node(id='Kolkata Knight Riders', type='team'), type='homeGroundOf', properties=[]), Relationship(source=Node(id='Eden Gardens', type='stadium'), target=Node(id='Cricket Association of Bengal', type='organization'), type='headquarters', properties=[])]


 96%|██████████████████████████████████████████████████████████████████████████████▋   | 24/25 [12:04<00:29, 29.43s/it]

[Node(id='ICC World Test Championship', type='tournament', properties=[]), Node(id='2021–2023 ICC World Test Championship', type='tournament', properties=[]), Node(id='final', type='match', properties=[Property(key='winner', value='Australia'), Property(key='marginOfVictory', value='209 runs')]), Node(id='Australia', type='team', properties=[Property(key='tournamentWins', value='1')]), Node(id='India', type='team', properties=[Property(key='tournamentWins', value='0')])]
[Relationship(source=Node(id='2021–2023 ICC World Test Championship', type='tournament'), target=Node(id='final', type='match'), type='hasFinal'), Relationship(source=Node(id='final', type='match'), target=Node(id='Australia', type='team'), type='winner'), Relationship(source=Node(id='final', type='match'), target=Node(id='India', type='team'), type='loser'), Relationship(source=Node(id='Australia', type='team'), target=Node(id='ICC World Test Championship', type='tournament'), type='participant'), Relationship(source=

100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [12:18<00:00, 24.71s/it]

[Node(id='Indian Premier League', type='event', properties=[Property(key='name', value='Indian Premier League'), Property(key='season', value='14'), Property(key='year', value='2021'), Property(key='sponsor', value='VIVO')]), Node(id='Mumbai Indians', type='team', properties=[Property(key='name', value='Mumbai Indians'), Property(key='defendingChampions', value='true')]), Node(id='Punjab Kings', type='team', properties=[Property(key='name', value='Punjab Kings'), Property(key='previousName', value='Kings XI Punjab')]), Node(id='Board of Control for Cricket in India', type='organization', properties=[Property(key='name', value='Board of Control for Cricket in India'), Property(key='established', value='2007')]), Node(id='Chennai Super Kings', type='team', properties=[Property(key='name', value='Chennai Super Kings')]), Node(id='Kolkata Knight Riders', type='team', properties=[Property(key='name', value='Kolkata Knight Riders')]), Node(id='Ruturaj Gaikwad', type='person', properties=[Pro

100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [12:18<00:00, 29.53s/it]


# Specify which node labels should be extracted by the LLM
allowed_nodes = ["Person", "Company", "Location", "Event", "Movie", "Service", "Award"]

for i, d in tqdm(enumerate(documents), total=len(documents)):
    extract_and_store_graph(d, allowed_nodes)

In [48]:
#Query the knowledge graph in a RAG application
from langchain.chains import GraphCypherQAChain

graph.refresh_schema()

cypher_chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=llm,
    qa_llm=llm,
    validate_cypher=True, # Validate relationship directions
    verbose=True
)
#cypher_chain.run("When The Walt Disney Company ?")
#cypher_chain.run("Who is Virat Kohli ?")
#cypher_chain.run("from which country Virat Kohli Played Cricket ?")
#cypher_chain.run("What is the age of Virat Kohli?")
#cypher_chain.run("What are the awards Virat Kohli received and on which year?")
cypher_chain.run("which domestic league Virat Kohli plays for ?")
cypher_chain.run("Where ICC T20 world cup held ?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:person {name: "Virat Kohli"})-[:PLAYS_FOR]->(t:team)
RETURN t.name[0m
Full Context:
[32;1m[1;3m[{'t.name': 'Indian national cricket team'}, {'t.name': 'Royal Challengers Bengaluru'}, {'t.name': 'Delhi'}][0m

[1m> Finished chain.[0m


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have the information about where the ICC T20 World Cup was held."