<a href="https://colab.research.google.com/github/stevereiner/llama-relik/blob/main/llama-relik-fastcoref.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Your Neo4j graph database info
# Recommended: using Neo4j Aura, which provides a free cloud instance that can easily be accessed from a Google Colab notebook
username="neo4j"
password="your password"
url="neo4j+s://xxxxxxxx.databases.neo4j.io"

import os
# Your OpenAI key
os.environ["OPENAI_API_KEY"] = "sk-your-key"

In [None]:
!pip install --quiet fastcoref spacy
!pip install --quiet llama-index-extractors-relik llama-index-graph-stores-neo4j llama-index-llms-openai llama-index

In [None]:
!python -m spacy download en_core_web_lg

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
from llama_index.graph_stores.neo4j import Neo4jPGStore

graph_store = Neo4jPGStore(
    username=username,
    password=password,
    url=url,
    refresh_schema=False
)

In [None]:
import pandas as pd

NUMBER_OF_ARTICLES = 10
news = pd.read_csv("https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/news_articles.csv")
news = news.head(NUMBER_OF_ARTICLES)

In [None]:
from fastcoref import spacy_component
import spacy
coref_nlp = spacy.load('en_core_web_lg')
coref_nlp.add_pipe('fastcoref')

text = 'Alice goes down the rabbit hole. Where she would discover a new reality beyond her expectations.'
doc = coref_nlp(text, component_cfg={"fastcoref": {'resolve_text': True}})
doc._.coref_clusters
print(doc._.resolved_text)


def coref_text(text):
    coref_doc = coref_nlp(text, component_cfg={"fastcoref": {'resolve_text': True}})
    resolved_text = coref_doc._.resolved_text
    return resolved_text

print(
    coref_text("Tomaz is so cool. He can solve various Python dependencies and not cry")
)

In [None]:
from llama_index.core import Document
from datetime import datetime

news["coref_text"] = news["text"].apply(coref_text)
documents = [
    Document(text=f"{row['title']}: {row['coref_text']}") for i, row in news.iterrows()
]


print(coref_text(news['text'][5]))

In [None]:
from llama_index.extractors.relik.base import RelikPathExtractor

relik = RelikPathExtractor(
    model="relik-ie/relik-relation-extraction-small", model_config={"skip_metadata": True}
)

# Use on Pro Collab with GPU
# relik = RelikPathExtractor(
#    model="relik-ie/relik-cie-small", model_config={"skip_metadata": True, "device":"cuda"}
# )

In [None]:
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-4o", temperature=0.0)
embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")

In [None]:
from llama_index.core import PropertyGraphIndex

index = PropertyGraphIndex.from_documents(
    documents,
    kg_extractors=[relik],
    llm=llm,
    embed_model=embed_model,
    property_graph_store=graph_store,
    show_progress=True,
)

In [None]:
query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("What happened at Ryanair?")

print(str(response))