In [1]:
!which python

/Users/tareksanger/Library/Caches/pypoetry/virtualenvs/lumis-tzzr_5k5-py3.12/bin/python


In [2]:
from lumis.config import config
import os
import pprint

pp = pprint.PrettyPrinter(indent=4)

os.environ["OPENAI_API_KEY"] = config.openai_api_key

In [3]:
import logging
import sys
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding


logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output


# define LLM
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

# from llama_index.embeddings.openai import OpenAIEmbedding
llm = OpenAI(temperature=0, model="gpt-4-turbo")
embed_model = OpenAIEmbedding(model="text-embedding-3-large")


Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 512

In [4]:
os.environ["NEBULA_USER"] = "root"
os.environ["NEBULA_PASSWORD"] = "nebula"  # default is "nebula"
os.environ["NEBULA_ADDRESS"] = "127.0.0.1:9669"

space_name = "stantec"

edge_types, rel_prop_names = ["relationship"], [
    "relationship"
]  # default, could be omit if create from an empty kg
tags = ["entity"]

In [5]:
def delete_space(address: str, space_name: str):
    from nebula3.gclient.net import ConnectionPool
    from nebula3.Config import Config

    # Configuration for the connection
    config = Config()
    config.max_connection_pool_size = 10
    # Initialize connection pool
    connection_pool = ConnectionPool()
    if not connection_pool.init([address.split(":")], config):
        pp.pprint("Failed to initialize connection pool.")
        return

    # Connect to the server
    session = connection_pool.get_session(
        os.environ["NEBULA_USER"], os.environ["NEBULA_PASSWORD"]
    )

    try:
        # Drop the space
        session.execute(f"DROP SPACE IF EXISTS {space_name}")

    finally:
        session.release()
        connection_pool.close()

In [6]:
delete_space(os.environ["NEBULA_ADDRESS"], space_name)

INFO:nebula3.logger:Get connection to ('127.0.0.1', '9669')


In [7]:
def create_namespace_if_not_exists(
    address: str, namespace_name: str, vid_type="FIXED_STRING", vid_size=256
):
    from nebula3.gclient.net import ConnectionPool
    from nebula3.Config import Config

    # Configuration for the connection
    config = Config()
    config.max_connection_pool_size = 10
    # Initialize connection pool
    connection_pool = ConnectionPool()
    if not connection_pool.init([address.split(':')], config):
        pp.pprint("Failed to initialize connection pool.")
        return

    # Connect to the server
    session = connection_pool.get_session(
        os.environ["NEBULA_USER"], os.environ["NEBULA_PASSWORD"]
    )
    try:
        # Determine the vid_type specification
        if vid_type == "FIXED_STRING":
            vid_spec = f"fixed_string({vid_size})"
        else:
            vid_spec = "int64" 
        create_space_command = f"CREATE SPACE IF NOT EXISTS {namespace_name}(vid_type={vid_spec}, partition_num=1, replica_factor=1)"
        create_space_result = session.execute(create_space_command)
        if create_space_result.is_succeeded():
            pp.pprint([f"Namespace '{namespace_name}' created successfully.", create_space_result.comment()])
        else:
            pp.pprint(["Failed to create namespace.", create_space_result.error_msg()])

    finally:
        session.release()
        connection_pool.close()

In [8]:
create_namespace_if_not_exists(os.environ["NEBULA_ADDRESS"], space_name)

INFO:nebula3.logger:Get connection to ('127.0.0.1', '9669')
["Namespace 'stantec' created successfully.", '']


In [9]:
from llama_index.core import StorageContext
from llama_index.graph_stores.nebula import NebulaGraphStore

graph_store = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)

# Looks like we might be able to add more storage methods to the storage context
storage_context = StorageContext.from_defaults(graph_store=graph_store)


In [10]:
graph_store.execute("CREATE TAG IF NOT EXISTS entity(name string)")
graph_store.execute(
    "CREATE EDGE IF NOT EXISTS relationship(relationship string)"
)
graph_store.execute("CREATE TAG INDEX entity_index ON entity(name(256))")
# CREATE TAG entity(name string);
# CREATE EDGE relationship(relationship string);
# CREATE TAG INDEX entity_index ON entity(name(256));

ResultSet(None)

In [34]:
from llama_index.readers.wikipedia import WikipediaReader

loader = WikipediaReader()

documents = loader.load_data(
    pages=["Stantec"], auto_suggest=True,
)

print(f"Loaded {len(documents)} documents")

Loaded 1 documents


In [12]:
from llama_index.core import KnowledgeGraphIndex
from llama_index.core.node_parser import SentenceSplitter

# Use pipelines for more control over data processing

index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=100,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
    retriever_mode='embedding'
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:openai._base_client:Retrying request to /chat/completions in 0.875570 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [13]:
# Company URL

url = "https://www.stantec.com/en"

Grab info from company website

In [33]:
from lumis.knowledge.CompanyInsights import CompanyInsights


company_insights = CompanyInsights(root_domain=url)
website_documents = company_insights.get_documents()

pprint.pprint(len(website_documents))

# index = KnowledgeGraphIndex.from_documents(
#     website_documents,
#     storage_context=storage_context,
#     max_triplets_per_chunk=70,
#     space_name=space_name,
#     edge_types=edge_types,
#     rel_prop_names=rel_prop_names,
#     tags=tags,
#     include_embeddings=True,
#     retriever_mode="embedding",
# )

119


In [40]:
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import KnowledgeGraphRAGRetriever

graph_rag_retriever = KnowledgeGraphRAGRetriever(
    storage_context=storage_context,
    verbose=True,
)

query_engine = RetrieverQueryEngine.from_args(
    retriever=graph_rag_retriever,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    include_text=True,
    similarity_top_k=5,
)

In [41]:
response = query_engine.query(
"""
Identify opportunities that leverage Stantec's specific competitive advantages to compete in a market or 
industry that they don't currently compete in and explain why they are uniquely positioned to do so.

Provide your answer in the form of a structured summary. Include a summary of the competitive advantage, 
financial strengths and weaknesses, and the market or industry that they should enter.
"""
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [39]:
import pprint



pprint.pprint(response.source_nodes[0].embedding)

None


In [31]:
from IPython.display import display, Markdown
display(Markdown(f"{response.response}"))

**Competitive Advantage Summary:**
Stantec has a robust competitive advantage in project management and infrastructure design, as evidenced by their experience in these areas under the leadership of Gord Johnston. Their involvement in diverse projects, including the FEMA Risk Map Production and Technical Services, showcases their capability in handling large-scale and nationwide projects. Additionally, Stantec's global presence, with locations in the United Kingdom and Turkey, and their operations across various regions, position them as a company with significant international reach and operational flexibility.

**Financial Strengths and Weaknesses:**
While specific financial details are not provided, Stantec’s involvement in large-scale projects and management of diverse projects suggest financial robustness necessary for undertaking substantial investments. Their ability to partner with potential clients and impact communities indicates a strong market position and community trust, which are crucial for financial stability. However, the expansion into new markets or industries could strain resources if not managed with careful financial planning and risk assessment.

**Suggested Market or Industry for Entry:**
Given Stantec’s expertise in infrastructure and project management, along with their experience in environmental services (as indicated by their project with FEMA), a promising new industry for them could be renewable energy infrastructure. This sector requires robust project management skills and benefits from companies that understand regulatory and environmental challenges. Stantec’s experience in managing diverse and large-scale projects, combined with their global operational capabilities, positions them uniquely to handle the complexities of renewable energy projects, which often span multiple international territories and require careful coordination of technical and environmental considerations.

**Rationale for Industry Entry:**
The renewable energy sector is growing globally, driven by increasing environmental concerns and the push for sustainable development. Stantec’s existing competencies in infrastructure design and project management, coupled with their commitment to community and environmental well-being, align well with the needs of this industry. Their global presence and ability to collaborate across various regions offer a strategic advantage in navigating the multinational landscape of renewable energy projects.

Query an already built GraphRAG

https://docs.llamaindex.ai/en/stable/examples/query_engine/knowledge_graph_rag_query_engine/