In [1]:
!pip install llama-index --quiet
!pip install neo4j --quiet
!pip install llama-index-graph-stores-neo4j --quiet
!pip install llama-parse --quiet
!pip install qdrant_client --quiet
!pip install llama-index-vector-stores-qdrant --quiet
!pip install llama-index-embeddings-fastembed --quiet
!pip install llama-index-llms-groq --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m187.4/187.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.2/374.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.8/295.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
import logging
from typing import List, Dict
import json
import re
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.indices.keyword_table import KeywordTableIndex
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.llms.groq import Groq
from llama_index.core import Settings, PropertyGraphIndex
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from llama_index.core import PromptTemplate
from llama_index.core.tools import QueryEngineTool
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)
from llama_index.core.response_synthesizers import TreeSummarize

import numpy as np
from google.colab import userdata
import nest_asyncio

nest_asyncio.apply()
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class IntegratedKnowledgeBaseQuery:
    def __init__(self):
        self.embed_model, self.llm = self._initialize_components()
        self.graph_store = self._setup_graph_store()
        self.vector_store = self._setup_vector_store()
        self.graph_index, self.vector_index = self._setup_index()

    def _initialize_components(self):
        embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
        Settings.embed_model = embed_model
        llm = Groq(model="llama3-70b-8192", api_key=userdata.get('GROQ_API_KEY'), temperature=0)
        Settings.llm = llm

        return embed_model, llm

    def _setup_graph_store(self):
        return Neo4jPropertyGraphStore(
            username='neo4j',
            url = "neo4j+s://c298635f.databases.neo4j.io",
            password = "N0lntUszAfuqwuT7-HEjhAs_pjzOGW5GeazIuQYVYMc",
            database="neo4j",
            refresh_schema=False,
            sanitize_query_output=True
        )

    def get_neo4j_schema(self):
        cypher_query = """
        CALL db.schema.visualization()
        """
        try:
            result = self.graph_store.structured_query(cypher_query)
            return result
        except Exception as e:
            logging.error(f"Error retrieving Neo4j schema: {str(e)}")
            return None

    def _setup_vector_store(self):
        return QdrantVectorStore(
            url="https://a4159bde-9d97-4b74-ab28-e52efb9e3152.europe-west3-0.gcp.cloud.qdrant.io:6333",
            api_key="IOYBrOoz6uyxJf8A_RNUEwev5ZOodlC59YIzkvR_1UC7VcGayXMY8w",
            collection_name="law_docs",
        )

    def _setup_index(self):
        storage_context = StorageContext.from_defaults(
            vector_store=self.vector_store,
            graph_store=self.graph_store,
        )
        graph_index = PropertyGraphIndex.from_existing(
            property_graph_store=self.graph_store,
            storage_context=storage_context)

        vector_index = VectorStoreIndex.from_vector_store(
            vector_store=self.vector_store,
            storage_context=storage_context,
        )
        return graph_index, vector_index

    def diagnose_stores(self):
        logging.info("Diagnosing graph store...")
        self._diagnose_graph_store()
        logging.info("Diagnosing vector store...")
        self._diagnose_vector_store()

    def _diagnose_graph_store(self):
        query = "MATCH (n) RETURN count(n) as node_count"
        result = self.graph_store.structured_query(query)
        node_count = result[0]['node_count']
        logging.info(f"Total nodes in the graph: {node_count}")

        query = "MATCH (n) RETURN DISTINCT labels(n) as node_types"
        result = self.graph_store.structured_query(query)
        node_types = [r['node_types'][0] for r in result if r['node_types']]
        logging.info(f"Node types in the graph: {', '.join(node_types)}")

        query = "MATCH (n) RETURN n LIMIT 5"
        result = self.graph_store.structured_query(query)
        logging.info("Sample nodes:")
        for record in result:
            logging.info(record['n'])

    def _diagnose_vector_store(self):
        collection_info = self.vector_store.client.get_collection(collection_name="law_docs")
        logging.info(f"Vector store collection info: {collection_info}")

    def format_case_details(self, results: List[Dict]) -> str:
        if not results:
            return "No case found with the given ID."

        case = results[0]['c']
        formatted_result = f"Case: {case.get('case_name', 'Unknown')}\n"
        formatted_result += f"Date Filed: {case.get('date_filed', 'Unknown')}\n"
        formatted_result += f"Court: {results[0]['court'].get('short_name', 'Unknown')}\n"
        formatted_result += f"Judges: {', '.join([j['name'] for j in results[0]['judges']])}\n"
        formatted_result += f"Author: {results[0]['author'].get('name', 'Unknown') if results[0]['author'] else 'Unknown'}\n"
        formatted_result += f"Attorneys: {', '.join([a['name'] for a in results[0]['attorneys']])}\n"
        formatted_result += f"Plaintiff: {results[0]['plaintiff'].get('name', 'Unknown') if results[0]['plaintiff'] else 'Unknown'}\n"
        formatted_result += f"Defendant: {results[0]['defendant'].get('name', 'Unknown') if results[0]['defendant'] else 'Unknown'}\n"
        formatted_result += f"Citations: {', '.join([c['text'] for c in results[0]['citations']])}\n"
        formatted_result += f"Opinion Type: {results[0]['opinion'].get('type', 'Unknown') if results[0]['opinion'] else 'Unknown'}\n"
        formatted_result += f"Docket Number: {results[0]['docket'].get('id', 'Unknown') if results[0]['docket'] else 'Unknown'}\n"

        return formatted_result

    def get_case_details(self, case_id):
        cypher_query = """
        MATCH (c:Case {id: $case_id})
        OPTIONAL MATCH (c)-[:DECIDED_BY]->(j:Judge)
        OPTIONAL MATCH (c)-[:AUTHORED_BY]->(a:Judge)
        OPTIONAL MATCH (c)-[:HEARD_IN]->(ct:Court)
        OPTIONAL MATCH (c)-[:REPRESENTED_BY]->(att:Attorney)
        OPTIONAL MATCH (p:Party)-[:FILED_CASE]->(c)
        OPTIONAL MATCH (c)-[:AGAINST]->(d:Party)
        OPTIONAL MATCH (c)-[:CITED_BY_BYCITED_BYit:Citation)
        OPTIONAL MATCH (c)-[:HAS_OPINION]->(o:Opinion)
        OPTIONAL MATCH (c)-[:HAS_DOCKET]->(docket:Docket)
        RETURN c, collect(DISTINCT j) as judges, a as author, ct as court,
               collect(DISTINCT att) as attorneys, p as plaintiff, d as defendant,
               collect(DISTINCT cit) as citations, o as opinion, docket
        """
        results = self.graph_store.structured_query(cypher_query, {"case_id": case_id})
        return results

    def format_graph_results(self, query):
        cypher_query = """
                MATCH (e)
                WHERE e.name CONTAINS $entity_name
                OPTIONAL MATCH (e)-[r]-(related)
                RETURN e as entity, type(r) as relationship_type, related
                LIMIT 10
                """
        # Extract key entities from the query
        entities = re.findall(r'\b[A-Z][a-z]+ (?:[A-Z][a-z]+ )*(?:Co\.|Corporation|Inc\.|LLC)\b|\b[A-Z][a-z]+\b', query)

        graph_results = []
        case_details = []
        for entity in entities:
            results = self.graph_store.structured_query(cypher_query, {"entity_name": entity})
            graph_results.extend(results)
            # Check if any of the results are Case nodes
            for result in results:
                if 'Case' in result['entity'].get('labels', []):
                    case_id = result['entity_id']
                    case_detail = self.get_case_details(case_id)
                    if case_detail:
                        formatted_case = self.format_case_details(case_detail)
                        case_details.append(formatted_case)
        formatted_results = []
        for graph_result in graph_results:
            entity = graph_result.get('entity', {})
            rel_type = graph_result.get('relationship_type')
            related = graph_result.get('related', {})

            entity_name = entity.get('name', 'Unknown')
            entity_type = next(iter(entity.get('labels', [])), 'Unknown')
            formatted_result = f"- {entity_name} ({entity_type})"

            if rel_type and related:
                related_name = related.get('name', 'Unknown')
                related_type = next(iter(related.get('labels', [])), 'Unknown')
                formatted_result += f"\n  {rel_type} {related_name} ({related_type})"

            formatted_results.append(formatted_result)

        return "\n".join(formatted_results), case_details

    def format_vector_results(self, query) -> str:
        query_vector = self.embed_model.get_text_embedding(query)
        vector_results = self.vector_store.client.search(
            collection_name="law_docs",
            query_vector=query_vector,
            limit=3
        )
        formatted_results = []
        for i, result in enumerate(vector_results, 1):
            # Access the payload and score attributes directly
              payload = result.payload
              score = result.score
              # Assuming 'content' is a field in your payload
              node_content = json.loads(result.payload['_node_content'])
              content = node_content.get('text', '')
              formatted_results.append(f"Document {i} (Score: {score:.4f}):\n{content[:300]}...")
        return "\n".join(formatted_results)

    def generate_llm_response(self, query: str, response: str, graph_results: List[Dict], vector_results: List[Dict], case_details: List[str]) -> str:
        graph_context = graph_results
        vector_context = vector_results

        prompt = f"""You are a highly knowledgeable Legal AI assistant specializing in analyzing court cases and legal precedents. Your task is to provide a very short and accurate response to the following query based on the information data provided.

        Query: {query}

        Data: {response}

        Knowledgebase Context: {graph_context} + {vector_context}

        Case Details: {case_details}

        Instructions:
        1. Analyze the Knowledgebase context, data and specific case details, extracting all relevant information related to the query.
        2. Provide a clear, concise, and well-structured response that directly addresses the query.
        3. Include specific details such as case names, courts, judges, plaintiffs, defendants, attorneys, dates filed, decision dates, case outcomes, judicial opinions and legal principles when available in any of the contexts but do not use the term 'document' or 'context' in your response.
        4. If the contexts contain information about multiple related cases or legal issues, combine and summarize them briefly and explain their relevance to the query.
        5. If there are any conflicting opinions or interpretations in the contexts, present them objectively and explain the implications.
        6. Use legal terminology accurately, but also provide explanations for complex terms to ensure clarity.
        7. If the contexts don't provide sufficient information to fully answer the query, clearly state what is known and what information is missing.
        8. Do not refer to the query, documents and contexts directly in your answer; instead, incorporate the information seamlessly into your response by saying "Based on my knowledge ...".
        9. Do not make assumptions or include information not present in the given contexts.
        10. Conclude your response with a brief summary of the key points.
        11. After your main response, suggest two follow-up questions that would be relevant for further exploration of the topic, prefaced with "For further exploration, you might consider asking:".

        Remember to maintain an objective, professional tone throughout your response. Do not refer to the query or contexts directly in your answer; instead, incorporate the information seamlessly into your response.

        Now, based on these instructions, please provide your comprehensive analysis and response."""

        llm_output = self.llm.complete(prompt).text
        return llm_output

    def query_knowledge_base(self, query: str) -> str:
        logging.info(f"Querying knowledge base: {query}")
        # try:
        # Create query engines
        graph_query_engine = self.graph_index.as_query_engine()
        vector_query_engine = self.vector_index.as_query_engine()

        # Create tools
        graph_tool = QueryEngineTool.from_defaults(
            query_engine=graph_query_engine,
            description="Useful for answering questions about relationships and connections between legal entities, cases, and concepts",
        )

        vector_tool = QueryEngineTool.from_defaults(
            query_engine=vector_query_engine,
            description="Useful for answering detailed questions about legal content, precedents, and case details",
        )

        TREE_SUMMARIZE_PROMPT_TMPL = (
            """You are a helpful legal AI assistant specialized in understanding the legal enquiries"""
        )
        tree_summarize = TreeSummarize(
            summary_template=PromptTemplate(TREE_SUMMARIZE_PROMPT_TMPL)
        )

        # Create router query engine
        router_query_engine = RouterQueryEngine(
            selector=LLMMultiSelector.from_defaults(),
            query_engine_tools=[graph_tool, vector_tool],
            summarizer=tree_summarize,
        )

        # Execute query
        response = router_query_engine.query(query)

        graph_results, case_details = self.format_graph_results(query)
        logging.info (f"Formatted graph results: {graph_results}")

        vector_results = self.format_vector_results(query)
        logging.info (f"Formatted vector results: {vector_results}")

        llm_response = self.generate_llm_response(query, str(response), graph_results, vector_results, case_details)
        logging.info(f"LLM response: {llm_response}")

        return str(llm_response)

def main():
    kb_query = IntegratedKnowledgeBaseQuery()
    kb_query.diagnose_stores()

    while True:
        print("\n--- Legal AI Knowledge Base ---")
        query = input("\nEnter your query (or 'quit' to exit): ")
        if query.lower() == 'quit':
            break
        response = kb_query.query_knowledge_base(query)
        print(response)
        print("\n" + "=" * 1000 + "\n")

if __name__ == "__main__":
    main()

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]


--- Legal AI Knowledge Base ---
Based on my knowledge, Marvin Green vs. Megan J. Brennan is a Supreme Court case decided on May 23, 2016. Marvin Green, the petitioner, brought a lawsuit against Megan J. Brennan, the Postmaster General, in the United States Court of Appeals for the Tenth Circuit. The case revolves around Title VII of the Civil Rights Act of 1964, which prohibits employment discrimination based on race, color, religion, sex, or national origin.

In this case, Justice Sotomayor delivered the opinion of the Court, but the specific details of the case, including the lower court's decision and the parties' arguments, are not provided in the available context.

The case was argued on November 30, 2015, and the Solicitor General, Donald B. Verrilli, Jr., and Acting Assistant Attorney General, Benjamin C. Mizer, represented the respondent, Megan J. Brennan.

Key points:

* Case name: Marvin Green vs. Megan J. Brennan
* Court: Supreme Court of the United States
* Decision date:

KeyboardInterrupt: Interrupted by user

In [2]:
from qdrant_client import QdrantClient
from google.colab import userdata


qdrant_client = QdrantClient(
            url="https://a4159bde-9d97-4b74-ab28-e52efb9e3152.europe-west3-0.gcp.cloud.qdrant.io:6333",
            api_key="IOYBrOoz6uyxJf8A_RNUEwev5ZOodlC59YIzkvR_1UC7VcGayXMY8w",
        )
# Get list of collections
collections = qdrant_client.get_collections()

# Print collection names
print("Existing collections:")
for collection in collections.collections:
    print(f"- {collection.name}")

# Optional: Print details of a specific collection
# Replace 'your_collection_name' with the actual name of your collection
collection_name = 'law_docs'
collection_info = qdrant_client.get_collection(collection_name)
print(f"\nDetails of collection '{collection_name}':")
print(collection_info)

Existing collections:
- law_docs

Details of collection 'law_docs':
status=<CollectionStatus.GREEN: 'green'> optimizer_status=<OptimizersStatusOneOf.OK: 'ok'> vectors_count=None indexed_vectors_count=0 points_count=16178 segments_count=2 config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConf

In [3]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
from google.colab import userdata

graph_store = Neo4jPropertyGraphStore(
    url = "neo4j+s://c298635f.databases.neo4j.io",
    username = "neo4j",
    password = "N0lntUszAfuqwuT7-HEjhAs_pjzOGW5GeazIuQYVYMc",
    refresh_schema=False,
    sanitize_query_output=True
)

# Get node types
node_types_query = "MATCH (n) RETURN DISTINCT labels(n) as node_types"
node_types_result = graph_store.structured_query(node_types_query)

# Get a sample of nodes
sample_nodes_query = "MATCH (n) RETURN n LIMIT 5"
sample_nodes_result = graph_store.structured_query(sample_nodes_query)

print("Node types:", node_types_result)
print("Sample nodes:", sample_nodes_result)

Node types: [{'node_types': ['Party', 'Defendant']}, {'node_types': ['Opinion']}, {'node_types': ['Case']}, {'node_types': ['Court']}, {'node_types': ['Party', 'Plaintiff']}, {'node_types': ['Judge']}, {'node_types': ['Attorney']}]
Sample nodes: [{'n': {'name': 'RadAmerica II, LLC'}}, {'n': {'id': '9782203', 'type': '010combined'}}, {'n': {'date_filed': '2023-08-28 00:00:00', 'nature_of_suit': 'nan', 'citation_count': '0', 'disposition': 'nan', 'posture': 'nan', 'case_name': 'Lisa Speach v. Bon Secours Health System, Inc.', 'decisiondate': 'nan', 'id': '9423230', 'case_name_full': 'nan', 'precedential_status': 'Unpublished', 'slug': 'lisa-speach-v-bon-secours-health-system-inc'}}, {'n': {'jurisdiction': 'Maryland, MD', 'id': 'court_9420150', 'type': 'S'}}, {'n': {'name': 'Mital Patel'}}]


In [4]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.llms.groq import Groq
from llama_index.core import Settings

embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model
llm = Groq(model="llama3-70b-8192", api_key=userdata.get('GROQ_API_KEY'))
Settings.llm = llm


vector_store = QdrantVectorStore(
    url="https://a4159bde-9d97-4b74-ab28-e52efb9e3152.europe-west3-0.gcp.cloud.qdrant.io:6333",
    api_key="IOYBrOoz6uyxJf8A_RNUEwev5ZOodlC59YIzkvR_1UC7VcGayXMY8w",
    collection_name="law_docs",
)

# Get collection info
collection_info = vector_store.client.get_collection(collection_name="law_docs")
print("Collection info:", collection_info)

# Get a sample of vectors
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)

sample_query = "Sample query to retrieve vectors"
sample_results = index.as_retriever().retrieve(sample_query)
print("Sample vector results:", sample_results)

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/706 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

model_optimized.onnx:   0%|          | 0.00/66.5M [00:00<?, ?B/s]

Collection info: status=<CollectionStatus.GREEN: 'green'> optimizer_status=<OptimizersStatusOneOf.OK: 'ok'> vectors_count=None indexed_vectors_count=0 points_count=16178 segments_count=2 config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quant

In [None]:
import re
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

query = 'Marvin GREEN vs. Megan J. BRENNAN'

cypher_query = """
        MATCH (e)
        WHERE e.name CONTAINS $entity_name
        OPTIONAL MATCH (e)-[r]-(related)
        RETURN e as entity, type(r) as relationship_type, related
        LIMIT 10
        """

graph_store = Neo4jPropertyGraphStore(
            url = "neo4j+s://c298635f.databases.neo4j.io",
            username = "neo4j",
            password = "N0lntUszAfuqwuT7-HEjhAs_pjzOGW5GeazIuQYVYMc",
            refresh_schema=False,
            sanitize_query_output=True
        )
# Extract key entities from the query
entities = re.findall(r'\b[A-Z][a-z]+ (?:[A-Z][a-z]+ )*(?:Co\.|Corporation|Inc\.|LLC)\b|\b[A-Z][a-z]+\b', query)

graph_results = []
for entity in entities:
    results = graph_store.structured_query(cypher_query, {"entity_name": entity})
    graph_results.extend(results)

print (graph_results)

[{'entity': {'name': 'Marvin Gerber'}, 'relationship_type': 'FILED_CASE', 'related': {'date_filed': '2021-09-15 00:00:00', 'nature_of_suit': 'nan', 'citation_count': '0', 'disposition': 'nan', 'posture': 'nan', 'case_name': 'Marvin Gerber v. Henry Herskovitz', 'decisiondate': 'nan', 'id': '5093805', 'case_name_full': 'nan', 'precedential_status': 'Published', 'slug': 'marvin-gerber-v-henry-herskovitz'}}, {'entity': {'name': 'Marvin Gerber'}, 'relationship_type': 'FILED_CASE', 'related': {'date_filed': '2021-09-15 00:00:00', 'nature_of_suit': 'nan', 'citation_count': '0', 'disposition': 'nan', 'posture': 'nan', 'case_name': 'Marvin Gerber v. Henry Herskovitz', 'decisiondate': 'nan', 'id': '5094124', 'case_name_full': 'nan', 'precedential_status': 'Published', 'slug': 'marvin-gerber-v-henry-herskovitz'}}, {'entity': {'name': 'Megan Brennan'}, 'relationship_type': 'AGAINST', 'related': {'date_filed': '2021-04-27 00:00:00', 'nature_of_suit': 'Civil', 'citation_count': '0', 'disposition': '

In [None]:
def format_graph_results(query):
  cypher_query = """
          MATCH (e)
          WHERE e.name CONTAINS $entity_name
          OPTIONAL MATCH (e)-[r]-(related)
          RETURN e as entity, type(r) as relationship_type, related
          LIMIT 10
          """
  # Extract key entities from the query
  entities = re.findall(r'\b[A-Z][a-z]+ (?:[A-Z][a-z]+ )*(?:Co\.|Corporation|Inc\.|LLC)\b|\b[A-Z][a-z]+\b', query)

  graph_results = []
  for entity in entities:
      results = graph_store.structured_query(cypher_query, {"entity_name": entity})
      graph_results.extend(results)

  formatted_results = []
  for graph_result in graph_results:
      entity = graph_result.get('entity', {})
      rel_type = graph_result.get('relationship_type')
      related = graph_result.get('related', {})

      entity_name = entity.get('name', 'Unknown')
      entity_type = next(iter(entity.get('labels', [])), 'Unknown')
      formatted_result = f"- {entity_name} ({entity_type})"

      if rel_type and related:
          related_name = related.get('name', 'Unknown')
          related_type = next(iter(related.get('labels', [])), 'Unknown')
          formatted_result += f"\n  {rel_type} {related_name} ({related_type})"

      formatted_results.append(formatted_result)

  return "\n".join(formatted_results)

In [None]:
from typing import List, Dict

def generate_llm_response(query: str, graph_results: List[Dict], llm) -> str:
        graph_context = graph_results

        prompt = f"""You are a highly knowledgeable Legal AI assistant specializing in analyzing court cases and legal precedents. Your task is to provide a brief and accurate response to the following query based on the information from both a graph database and a vector database.

        Query: {query}

        Knowledgebase Context:
        {graph_context}

        Instructions:
        1. Analyze both the graph and vector database contexts, extracting all relevant information related to the query.
        2. Provide a clear, concise, and well-structured response that directly addresses the query.
        3. Include specific details such as case names, dates, court decisions, and legal principles when available in either context.
        4. If the contexts contain information about multiple related cases or legal issues, combine and summarize both briefly and explain their relevance to the query.
        5. If there are any conflicting opinions or interpretations in the contexts, present them objectively and explain the implications.
        6. Use legal terminology accurately, but also provide explanations for complex terms to ensure clarity.
        7. If the contexts don't provide sufficient information to fully answer the query, clearly state what is known and what information is missing.
        8. Do not refer to the query or contexts directly in your answer; instead, incorporate the information seamlessly into your response by saying "Based on my knowledge ...".
        8. Do not make assumptions or include information not present in the given contexts.
        9. Conclude your response with a brief summary of the key points.
        10. After your main response, suggest two follow-up questions that would be relevant for further exploration of the context, prefaced with "For further exploration, you might consider asking:".

        Remember to maintain an objective, professional tone throughout your response. Do not refer to the query or contexts directly in your answer; instead, incorporate the information seamlessly into your response.

        Now, based on these instructions, please provide your comprehensive analysis and response."""

        response = llm.complete(prompt)
        return response.text

In [None]:
graph_results = format_graph_results(query)
llm_response = generate_llm_response(query, graph_results, llm)
print(llm_response)

Based on my knowledge, the case of Marvin GREEN vs. Megan J. BRENNAN appears to be a unique instance, as there is no direct match in the provided graph and vector database contexts. However, I can provide some insights based on the available information.

The contexts do not contain specific details about the case, such as the court, date, or legal principles involved. However, it is possible to infer that Megan J. BRENNAN is a person involved in a legal matter, as she is mentioned in multiple instances with the "AGAINST" relationship.

There are several individuals named Megan with varying surnames (e.g., Boelstler, Michaud, Fields, Goddard, Barbero, and Davis) who are represented by unknown parties. It is unclear whether any of these individuals are related to Megan J. BRENNAN or if they are involved in separate legal matters.

Marvin GREEN, on the other hand, does not appear to be mentioned in the contexts, except for the query itself. There are two instances of Marvin Gerber, but t

In [None]:
import json

def format_vector_results(query) -> str:
  query_vector = embed_model.get_text_embedding(query)
  vector_results = qdrant_client.search(
      collection_name="law_docs",
      query_vector=query_vector,
      limit=3
  )
  formatted_results = []
  for i, result in enumerate(vector_results, 1):
      # Access the payload and score attributes directly
        payload = result.payload
        score = result.score
        # Assuming 'content' is a field in your payload
        node_content = json.loads(result.payload['_node_content'])
        content = node_content.get('text', '')
        formatted_results.append(f"Document {i} (Score: {score:.4f}):\n{content[:300]}...")
  return "\n".join(formatted_results)

In [None]:
query_vector1 = embed_model.get_text_embedding(query)
vector_results1 = qdrant_client.search(
    collection_name="law_docs",
    query_vector=query_vector1,
    limit=3
)

print (vector_results1)

[ScoredPoint(id='d1340400-0a54-4e88-a94b-a3ac00ef4a45', version=108, score=0.6918648, payload={'_node_content': '{"id_": "d1340400-0a54-4e88-a94b-a3ac00ef4a45", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "012cc61f-d792-4265-953b-9f8f4001e76b", "node_type": "4", "metadata": {}, "hash": "1e85bc9ee9514fef8596e3ab94d5a8302905b2c6bfd3187a5cb574da828fa098", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "f7cd1143-cb86-4974-9e2a-fd246fea810f", "node_type": "1", "metadata": {}, "hash": "8b47f225cdea59a6f6f51e431e5978c1d0bd7fe47e931541d63fd1cddb0f6607", "class_name": "RelatedNodeInfo"}}, "text": "parties: Marvin GREEN, Petitioner, docketnumber: No. 14-613., court: Supreme Court of the United States, decisiondate: Argued Nov. 30, 2015., attorneys: Donald B. Verrilli, Jr., Solicitor General, Counsel of Record, Benjamin C. Mizer, Acting Assistant Attorney General, Marleigh D. Dover, Charles W. Scar

In [None]:
format_vector_results(query)

'Document 1 (Score: 0.6919):\nparties: Marvin GREEN, Petitioner, docketnumber: No. 14-613., court: Supreme Court of the United States, decisiondate: Argued Nov. 30, 2015., attorneys: Donald B. Verrilli, Jr., Solicitor General, Counsel of Record, Benjamin C. Mizer, Acting Assistant Attorney General, Marleigh D. Dover, Charles W. ...\nDocument 2 (Score: 0.6807):\nMARVIN GREEN, PETITIONER v. MEGAN J. \n\n        BRENNAN, POSTMASTER GENERAL\n\n ON WRIT OF CERTIORARI TO THE UNITED STATES COURT OF \n\n            APPEALS FOR THE TENTH CIRCUIT\n\n                                 May 23, 2016 \n\n\n  JUSTICE SOTOMAYOR delivered the opinion of the Court.\n  Title VII of the Ci...\nDocument 3 (Score: 0.6713):\nDate Filed: 2015-01-27 00:00:00\nJudges: Dubose\nOpinion Id: 7229662\nOpinion Types: 020lead\n\n\nORDER\n\nKRISTI K. DuBOSE, District Judge.\nThis action is before the Court on the motion for summary judgment, brief in support of the motion, proposed determinations of undisputed facts and 

In [None]:
vector_results = format_vector_results(query)
llm_response = generate_llm_response(query, vector_results, llm)
print(llm_response)

Based on my knowledge, the case of Marvin Green vs. Megan J. Brennan revolves around a petition filed by Marvin Green against Megan J. Brennan, the Postmaster General, in the Supreme Court of the United States. The case was argued on November 30, 2015, and the decision was delivered on May 23, 2016.

The Supreme Court's opinion, delivered by Justice Sotomayor, addressed the issue of Title VII of the Civil Rights Act of 1964. The Court's decision is not explicitly stated in the provided contexts, but it is likely that the opinion pertained to the interpretation and application of Title VII in the context of employment discrimination.

Prior to the Supreme Court's decision, the case was heard in the United States Court of Appeals for the Tenth Circuit. Additionally, a related action was filed in a lower court, where District Judge Kristi K. DuBose presided over a motion for summary judgment.

The legal principles at play in this case likely involve the scope and application of Title VII,