In [None]:
!pip install pyvis
!pip install llama-index-vector-stores-chroma
!pip install llama-index-llms-huggingface
!pip install llama-index-llms-huggingface-api
!pip install "transformers[torch]" "huggingface_hub[inference]"
!pip install llama-index-embeddings-huggingface
!pip install llama-index-embeddings-instructor
!pip uninstall transformers -y
!pip install transformers
!pip install --upgrade protobuf

**Setting up Embedding model and LLM**

In [None]:
# Import necessary libraries

import os
from typing import List, Optional
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
#from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os

# Set your Huggingface API token if using the Inference API
os.environ["HUGGINGFACE_API_KEY"] = "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"

# Setting the embedding model 
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")


from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

llm = Ollama(model="llama3.3:70b", request_timeout=300.0)

**Creating property graph index and Knowledge Graph Generation**

In [None]:
from IPython.display import Markdown, display
from llama_index.core import Document

# Correctly create a Document object
with open("SS-9&10.txt", "r") as file:
    text_content = file.read()
    document = Document(text=text_content)

# Create a list of documents (even though it's just one document)
documents = [document]

from llama_index.core import PropertyGraphIndex
import nest_asyncio
nest_asyncio.apply()

# Creating Property Graph Index
index = PropertyGraphIndex.from_documents(
    documents,
    llm=llm,
    embed_model=embed_model,
    show_progress=True,
)

**Saving the Property Graph Index**

In [None]:
# Saving the index to a HTML file for visualization
index.property_graph_store.save_networkx_graph(name="./SS-9&10.html")

In [None]:
from llama_index.core import Settings

#
Settings.llm = llm
Settings.embed_model = embed_model

In [None]:
# Persisting the index to disk
from llama_index.core import StorageContext, load_index_from_storage
index.storage_context.persist(persist_dir="./SS-9&10")

# Loading the index from disk
index = load_index_from_storage(StorageContext.from_defaults(persist_dir="./SS-9&10"))

# Creating a query engine with the loaded index
query_engine = index.as_query_engine(include_text=True)

In [None]:
# Create a retriever from the index
retriever = index.as_retriever(
    include_text=False,  # include source text, default True
)

In [None]:
# Sample query to retrieve nodes
question = "your question here" # Replace with your actual question

nodes = retriever.retrieve(question)

for node in nodes:
    print(node.text)

In [None]:
# Response to a question using the query engine

response = query_engine.query(question)

display(Markdown(f"{response.response}"))

**Taking the whole questions and Generating Answer**

In [None]:
import pandas as pd
# Read the CSV file containing questions
df = pd.read_csv('questions.csv')
print(f"Loaded {len(df)} questions from CSV file")
df.head()

In [None]:
# Process each question and generate answers using GraphRAG
for i, row in df.iterrows():
    question = row['Question']
    print(f"Processing question {i+1}/{len(df)}: {question[:50]}...")
    
    try:
        # Get answer using GraphRAG query engine
        response = query_engine.query(question)
        answer = response.response
        
        # Store the answer in the DataFrame
        df.at[i, 'GraphRAG Answer'] = answer
        
    except Exception as e:
        print(f"  Error processing question: {str(e)}")
        df.at[i, 'GraphRAG Answer'] = "Error generating answer"
        
    
    # Save progress after each question
    df.to_csv('questions_answered.csv', index=False)
    
print("\nAll questions processed!")

In [None]:
# Display the first few rows of the updated DataFrame
df.head()

In [None]:
# Save the final results to CSV
df.to_csv('questions_answered_final.csv', index=False)
print("Final results saved to 'questions_answered_final.csv'")

In [None]:
# Example: Display one question and answer for verification
sample_index = 0  # Change this to view different Q&A pairs
print(f"Question: {df.iloc[sample_index]['Question']}")
print("\nGraphRAG Answer:")
display(Markdown(df.iloc[sample_index]['GraphRAG Answer']))