In [None]:
## Install neccessary libraries

# !pip install -U langchain-community
# !pip install langchain-openai
# !pip install llama-index
# !pip install llama-index-embeddings-langchain
# !pip install pyvis

In [3]:
import nest_asyncio
nest_asyncio.apply()
from dotenv import load_dotenv
# Load environment variables from a .env file
load_dotenv()
from IPython.display import Markdown, display
from pyvis.network import Network

import os
import pandas as pd

import openai
from langchain_openai import OpenAIEmbeddings

from llama_index.core import SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
from llama_index.core import PropertyGraphIndex, VectorStoreIndex
from llama_index.core import Document
from llama_index.core.indices.property_graph import DynamicLLMPathExtractor
from llama_index.core import ServiceContext, GPTVectorStoreIndex
from langchain.chat_models import ChatOpenAI
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import QueryBundle
from llama_index.core.schema import NodeWithScore
from llama_index.core.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
)
from llama_index.core import Settings


from typing import List


# Set the OpenAI API key environment variable
# os.environ["OPENAI_API_KEY"] = "" #Enter your Key
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [None]:

class GraphRagRetriever(BaseRetriever):
    """Custom retriever that performs both Vector search and Knowledge Graph search"""

    def __init__(
        self,
        vector_retriever,
        pg_retriever,
        mode: str = "OR",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._pg_retriever = pg_retriever
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        pg_nodes = self._pg_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        pg_ids = {n.node.node_id for n in pg_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in pg_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(pg_ids)
        else:
            retrieve_ids = vector_ids.union(pg_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

In [5]:
job_assigns_df = pd.read_csv('data/person_to_job.csv').dropna(subset=['VRF ID']).reset_index(drop=True).iloc[:50]

In [6]:
job_assigns_df['job'] = job_assigns_df['VRF ID'].apply(lambda x: x.split('-')[1])
job_assigns_df['Skillset'] = job_assigns_df['Skillset'].apply(lambda x: x.replace('\n', ' '))
job_assigns_df['summary'] = " Participant with skills: " + job_assigns_df['Skillset'] + " was assigned to job: " + job_assigns_df['job']
corpus = '.'.join(job_assigns_df['summary'])
corpus


' Participant with skills: Media and Communication / BPO Soft Skills / Enthusiastic Soft Skills / Soft-spoken and cordial was assigned to job:  Customer Support Representatives . Participant with skills: Soft Skills / Articulate in communication was assigned to job:  On ground Support . Participant with skills: Soft Skills / EnthusiasticBasic Computer Skills / Basic Computer (MS Office and Email) Skills was assigned to job:  Marketing . Participant with skills: General / Hospitality/ Hotel Manager was assigned to job:  Class Teacher . Participant with skills: Soft Skills / Articulate in communication was assigned to job:  Kannada Translator . Participant with skills: Soft Skills / Articulate in communication was assigned to job:  LSP . Participant with skills: Emedia / Digital Marketing was assigned to job:  Social media Manager . Participant with skills: Engineering / Electrical Engineer was assigned to job:  Content Support Executive (CSE) . Participant with skills: Education / Teach

In [7]:
#documents = SimpleDirectoryReader('data/').load_data()

documents = [Document(text=corpus)]

embeddings = OpenAIEmbeddings()
llm = OpenAI(temperature=0, model_name="gpt-4o", max_tokens=4000)
# llm = OpenAI(temperature=0, model="text-davinci-002")
Settings.llm = llm
Settings.embed_model = embeddings

In [8]:

kg_extractor = DynamicLLMPathExtractor(
    llm=llm,
    max_triplets_per_chunk=20,
    num_workers=4,
    allowed_entity_types=["JOB", "SKILL"],
    allowed_relation_types=["WORKS_WITH", "RELATED_TO", "SIMILAR_TO"], #"USED_BY",
)

pg_index = PropertyGraphIndex.from_documents(
    documents,
    llm = llm,
    embed_model = embeddings,
    show_progress=True,
    kg_extractors=[kg_extractor],
)



Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting and inferring knowledge graph from text: 100%|██████████| 2/2 [00:07<00:00,  3.93s/it]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.27it/s]
Generating embeddings: 100%|██████████| 8/8 [00:01<00:00,  6.78it/s]


In [9]:
pg_index.property_graph_store.save_networkx_graph(name="kg.html")
pg_retriever = pg_index.as_retriever()

In [10]:
vector_index = VectorStoreIndex.from_documents(documents)
vector_retriever = VectorIndexRetriever(index=vector_index)


In [None]:
custom_retriever = GraphRagRetriever(vector_retriever, pg_retriever)

# create response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

In [13]:
custom_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

vector_query_engine = vector_index.as_query_engine()

pg_keyword_query_engine = pg_index.as_query_engine(
    # setting to false uses the raw triplets instead of adding the text from the corresponding nodes
    include_text=False,
    retriever_mode="keyword",
    response_mode="tree_summarize",
)

In [16]:
response = custom_query_engine.query("Give me some information on my data")
display(Markdown(f"<b>{response}</b>"))

<b>Basic Computer Skills are required for various roles such as Administrative Activities (Back Office), Ashram Support, Platform Manager, Video calls, Tamil Translator, LSP, and Dining Support. Emedia skills are necessary for roles like Growth Marketing Manager and Promotions. Soft Skills, particularly being articulate in communication, are valued in positions like Sales Manager/Executive, Maintenance Support, Guiding Visitors, On ground Support, Marketing, Kannada Translator, IHS, Front Office Activities, and Customer Support Representatives. Additionally, Medical skills are relevant for Ashram Support, while Engineering skills are needed for roles like Content Support Executive (CSE) and Researcher.</b>