In [1]:
from dotenv import load_dotenv
import os

# Common data processing
import json
import textwrap

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI


# Warning control
import warnings
warnings.filterwarnings("ignore")

In [10]:
# Load from environment
load_dotenv('.env', override=True)
NEO4J_URL = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')


In [3]:
kg = Neo4jGraph(
    url="neo4j+s://013d8f5e.databases.neo4j.io", username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database="neo4j"
)

In [4]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import time
llm=ChatOpenAI(model="gpt-3.5-turbo")
output_parser=StrOutputParser()



In [5]:
prompt=ChatPromptTemplate.from_messages(
    [
        ("system","You are a expert in Insurance you will be give some information about insurace then you have to summerize it in onwe line"),
        ("user","Input:{question}")
    ]
)


In [6]:
def AI(input_text,prompt):
    chain=prompt|llm|output_parser
    ans=chain.invoke({'question':input_text})
    time.sleep(8)
    return ans

In [7]:
insurance_company="HDFC ERGO"
insurance_type="two-wheeler-insurance"
source="https://www.hdfcergo.com/two-wheeler-insurance"

In [43]:


# Assuming these global variables are defined somewhere in your script:
# prompt, insurance_company, insurance_type, source, AI

def txt_to_data(folder,about):
    contents = os.listdir(folder)
    print(contents)  # For debugging: prints out what files are seen in the directory
    chunks_with_metadata = []
    
    chunk_seq_id = 0
    # Loop through each file in the directory
    for filename in contents:
        file_path = os.path.join(folder, filename)  # Full path to the file
        try:
            with open(file_path, 'r') as file:
                content = file.read()
                # Use the global AI function with the globally defined prompt
                HeadLine = AI(content, prompt)
                
                # Append the content along with other metadata to the list
                chunks_with_metadata.append({
                    'text': content,
                    'chunkSeqId': chunk_seq_id,
                    'insurance_company': insurance_company,
                    'insurance_type': insurance_type,
                    'source': source,
                    'HeadLine': HeadLine,
                    'about':about
                })
                chunk_seq_id += 1
        except Exception as e:
            print(f"Failed to process {filename}: {e}")

    return chunks_with_metadata

# Example usage:
# result = txt_to_data('path_to_your_folder')
# print(result)


In [None]:
folder_path = r'D:\DEV\Knowladege graph\Data\HDFCERGO\buying'
first_file_chunks = txt_to_data(folder_path,'buying')
print(first_file_chunks)

folder_path_two = r'D:\DEV\Knowladege graph\Data\HDFCERGO\claim'
second_file_chunks = txt_to_data(folder_path,'claim')
print(second_file_chunks)

folder_path_three = r'D:\DEV\Knowladege graph\Data\HDFCERGO\feature'
third_file_chunks = txt_to_data(folder_path_three,'feature')
print(third_file_chunks)

folder_path_four = r'D:\DEV\Knowladege graph\Data\HDFCERGO\type'
fourth_file_chunks = txt_to_data(folder_path_four,'type')
print(fourth_file_chunks)

Final=[]
for i in first_file_chunks:
    Final.append(i)
    
for i in second_file_chunks:
    Final.append(i)
    
for i in third_file_chunks:
    Final.append(i)

for i in fourth_file_chunks:
    Final.append(i)


In [45]:
print(Final)

[{'text': 'How to Choose the Right Two wheeler Insurance Online?\nHere are useful tips to help you choose the right bike insurance policy as per your requirements and budget: -\n\n1. Know your Coverage :Requirement Before looking for bike insurance plan, it is essential to make an assessment based on your requirement and budget. While buying bike insurance policy you can choose between third party cover and comprehensive cover. Depending upon the usage of your two wheeler, you should choose bike insurance plan that offers coverage as per your requirement.\n\n2. Understand Insurance Declared Value (IDV) :   IDV is the current market value of your bike. IDV is the maximum sum insured fixed when purchasing a bike insurance policy and is the amount that insurer will pay in case of total loss or theft of two wheeler. Therefore, IDV is one of the most critical factors determining the two wheeler insurance premium.\n\n3. Look for Add-on to Extend your Bike Insurance Cover : Look for riders th

[]

In [50]:
kg.query("SHOW INDEXES")

[{'id': 0,
  'name': 'index_343aff4e',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'NODE',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': None,
  'readCount': 0},
 {'id': 1,
  'name': 'index_f7700477',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'RELATIONSHIP',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': None,
  'readCount': 0},
 {'id': 2,
  'name': 'unique_chunk',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'RANGE',
  'entityType': 'NODE',
  'labelsOrTypes': ['Chunk'],
  'properties': ['chunkId'],
  'indexProvider': 'range-1.0',
  'owningConstraint': 'unique_chunk',
  'lastRead': None,
  'readCount': 0}]

In [55]:
import uuid

merge_chunk_node_query = """
MERGE (mergedChunk:Chunk {chunkId: $chunkParam.chunkId})
    ON CREATE SET 
        mergedChunk.HeadLine = $chunkParam.HeadLine,
        mergedChunk.chunkId = $chunkParam.chunkId,
        mergedChunk.insurance_company = $chunkParam.insurance_company,
        mergedChunk.insurance_type= $chunkParam.insurance_type,
        mergedChunk.source = $chunkParam.source,
        mergedChunk.about = $chunkParam.about,
        mergedChunk.details = $chunkParam.text
RETURN mergedChunk
"""

node_count = 0
for chunk in Final:
    if not chunk.get('chunkId'):
        chunk['chunkId'] = str(uuid.uuid4())  # Generate a unique UUID if chunkId is None
    kg.query(merge_chunk_node_query, 
            params={'chunkParam': chunk})
    node_count += 1

print(f"Created {node_count} nodes")

# Verifying total number of nodes
result = kg.query("""
         MATCH (n:Chunk)
         RETURN count(n) as nodeCount
         """)
print(result)  # Adjust to your environment's way to print query results


Created 21 nodes
[{'nodeCount': 21}]


In [8]:
kg.query("""
         CREATE VECTOR INDEX `HDFCERGO` IF NOT EXISTS
          FOR (c:Chunk) ON (c.textEmbedding) 
          OPTIONS { indexConfig: {
            `vector.dimensions`: 1536,
            `vector.similarity_function`: 'cosine'    
         }}
""")

[]

In [12]:
pip install langchain_google_genai

Collecting langchain_google_genai
  Downloading langchain_google_genai-1.0.2-py3-none-any.whl.metadata (3.8 kB)
Collecting google-generativeai<0.6.0,>=0.5.0 (from langchain_google_genai)
  Downloading google_generativeai-0.5.1-py3-none-any.whl.metadata (3.9 kB)
Collecting google-ai-generativelanguage==0.6.2 (from google-generativeai<0.6.0,>=0.5.0->langchain_google_genai)
  Downloading google_ai_generativelanguage-0.6.2-py3-none-any.whl.metadata (5.6 kB)
Collecting google-api-core (from google-generativeai<0.6.0,>=0.5.0->langchain_google_genai)
  Using cached google_api_core-2.18.0-py3-none-any.whl.metadata (2.7 kB)
Collecting google-api-python-client (from google-generativeai<0.6.0,>=0.5.0->langchain_google_genai)
  Downloading google_api_python_client-2.126.0-py2.py3-none-any.whl.metadata (6.7 kB)
Collecting google-auth>=2.15.0 (from google-generativeai<0.6.0,>=0.5.0->langchain_google_genai)
  Using cached google_auth-2.29.0-py2.py3-none-any.whl.metadata (4.7 kB)
Collecting proto-plus

In [14]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
kg.query("""
    MATCH (chunk:Chunk) WHERE chunk.textEmbedding IS NULL
    WITH chunk, genai.vector.encode(
      chunk.details, 
      "GoogleGenerativeAIEmbeddings", 
      {
        token: $GoogleGenerativeAIEmbeddings 
        
      }) AS vector
    CALL db.create.setNodeVectorProperty(chunk, "textEmbedding", vector)
    """, 
    params={"GoogleGenerativeAIEmbeddings":"AIzaSyBPvN2LK5zayUy3_5IAa02q_RzReiCrdxc" } )
mbeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001",google_api_key="AIzaSyBPvN2LK5zayUy3_5IAa02q_RzReiCrdxc")


ClientError: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke function `genai.vector.encode`: Caused by: java.lang.RuntimeException: Vector encoding provider not supported: GoogleGenerativeAIEmbeddings}

In [79]:
kg.refresh_schema()
print(kg.schema)

Node properties are the following:
Chunk {chunkId: STRING, HeadLine: STRING, insurance_company: STRING, insurance_type: STRING, source: STRING, about: STRING, details: STRING}
Relationship properties are the following:

The relationships are the following:



In [None]:
folder_path = r'D:\DEV\Knowladege graph\Data\HDFCERGO\buying'
first_file_chunks = txt_to_data(folder_path,'buying')
print(first_file_chunks)

folder_path_two = r'D:\DEV\Knowladege graph\Data\HDFCERGO\claim'
second_file_chunks = txt_to_data(folder_path,'claim')
print(second_file_chunks)

folder_path_three = r'D:\DEV\Knowladege graph\Data\HDFCERGO\feature'
third_file_chunks = txt_to_data(folder_path_three,'feature')
print(third_file_chunks)

folder_path_four = r'D:\DEV\Knowladege graph\Data\HDFCERGO\type'
fourth_file_chunks = txt_to_data(folder_path_four,'type')
print(fourth_file_chunks)

Final=[]
for i in first_file_chunks:
    Final.append(i)
    
for i in second_file_chunks:
    Final.append(i)
    
for i in third_file_chunks:
    Final.append(i)

for i in fourth_file_chunks:
    Final.append(i)
