In [3]:
import logging
import os
import sys

import psycopg2
from dotenv import load_dotenv
from psycopg2.extras import Json

# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("graph_db_ingestion")

# Define database connection parameters
conn_params = {
    "dbname": os.getenv("POSTGRES_DATABASE", "postgres"),
    "user": os.getenv("POSTGRES_USERNAME", "postgres"),
    "password": os.getenv("POSTGRES_PASSWORD", "postgres"),
    "host": os.getenv("POSTGRES_HOST", "localhost"),
    "port": os.getenv("POSTGRES_PORT", "5432"),
}

In [8]:
import openai
import numpy as np
import psycopg2
from psycopg2.extensions import register_adapter, AsIs
from psycopg2.extras import Json
import os
from dotenv import load_dotenv

# Load environment variables and specify the file path if needed

load_dotenv()

def addapt_vector(nparray):
    """Adapt a numpy array to the PostgreSQL VECTOR type."""
    # Convert numpy array to string with square brackets for PostgreSQL VECTOR type
    vector_str = ','.join(map(str, nparray))
    return AsIs(f"'[{vector_str}]'::VECTOR")

register_adapter(np.ndarray, addapt_vector)


deployment_id = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT")
openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_key = os.getenv("AZURE_OPENAI_KEY")
openai.api_version = '2023-05-15'

def create_embeddings(query, deployment=None):
    if deployment is None:
        deployment = deployment_id  # Use the deployment ID as the default identifier
    
    try:
        # New API structure for embedding creation
        response = openai.embeddings.create(
            input=query,
            model=deployment
        )
        # embedding = response['data'][0]['embedding']
        embedding = response.data[0].embedding
        return np.array(embedding)
    except Exception as e:
        print(f"Error generating embeddings: {e}")
        return None


    
def execute_rerank_function(embedding, query, top_n, consider_n):
    conn_params = {
        "dbname": os.getenv("POSTGRES_DATABASE"),
        "user": os.getenv("POSTGRES_USERNAME"),
        "password": os.getenv("POSTGRES_PASSWORD"),
        "host": os.getenv("POSTGRES_HOST"),
        "port": os.getenv("POSTGRES_PORT"),
    }
    
    try:
        with psycopg2.connect(**conn_params) as conn:
            with conn.cursor() as cursor:
                cursor.execute("""
                    SELECT * FROM get_vector_rerank_pagerank_rrf2_cases(%s, %s, %s, %s);
                """, (embedding, query, top_n, consider_n))
                
                # Fetch the results
                results = cursor.fetchall()
                colnames = [desc[0] for desc in cursor.description]
                return [dict(zip(colnames, row)) for row in results]
    except psycopg2.Error as e:
        print(f"Error executing SQL function: {e}")
        return None


query = "Water leaking into the apartment from the floor above."
top_n = 5
consider_n = 10

# Step 1: Create the embedding in Python
embedding = create_embeddings(query)

if embedding is not None:
    # Step 2: Execute the PostgreSQL function with the embedding as a parameter
    results = execute_rerank_function(embedding, query, top_n, consider_n)

    # Output the results
    for result in results:
        print(result)
else:
    print("Failed to generate embedding.")


{'score': Decimal('0.01639344262295081967'), 'pagerank_rank': 1, 'relevance': 0.0, 'id': '615468', 'vector_rank': 1, 'abbr': 'Le Vette v. Hardman Estate', 'pagerank': Decimal('0.8718440727625433'), 'data': {'id': 615468, 'name': 'Mabel Le Vette, Appellant, v. Hardman Estate et al., Respondents', 'court': {'id': 9029, 'name': 'Washington Supreme Court', 'name_abbreviation': 'Wash.'}, 'analysis': {'sha256': '4361853e35bf3c3ccc332cec2f938ceae6f3b96b8e3b83e31725116a3a7d6386', 'simhash': '1:ad47d891b7f6505d', 'pagerank': {'raw': 3.3003634237380244e-07, 'percentile': 0.8718440727625433}, 'char_count': 10485, 'word_count': 1835, 'cardinality': 527, 'ocr_confidence': 0.537}, 'casebody': {'judges': [], 'parties': ['Mabel Le Vette, Appellant, v. Hardman Estate et al., Respondents.'], 'opinions': [{'text': "Morris, J.\nAppeal from an order of nonsuit and dismissal, in an action brought by a tenant to recover damages for injuries to her goods, caused by leakage of water from an upper story. The fa

In [None]:
from openai import AzureOpenAI

# Initialize Azure OpenAI client with key-based authentication
client = AzureOpenAI(
    azure_endpoint = config("AZURE_OPENAI_ENDPOINT"),
    api_key = config("AZURE_OPENAI_KEY"),
    api_version = "2024-05-01-preview",
)

def generate3(question, context):
    completion = client.chat.completions.create(
        model='gpt-4o',
        messages= [
            {
                "role": "system",
                "content":
                    """You are an Leagal Research AI Assistant that helps people understand relevant legal cases. Answer the user's QUESTION below using provided cases in the CONTEXT section. Keep your answer grounded in the cases in the CONTEXT section. If the CONTEXT section doesn’t contain the facts to answer the QUESTION, return NONE. Identify top 2 legal principles used in the cases provided, explain them and group prominent cases by those principles.
                    """
            },
            {
                "role": "user",
                "content": f"""
                QUESTION: {question}
                CONTEXT: {str(context)}
                """
            }
        ],
        max_tokens=800,
        temperature=0.3,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None,
        stream=False
    )
    return completion.choices[0].message.content

generate3(
    'Water leaking into the apartment from the floor above. What are the prominent legal precedents in Washington on this problem?',
    df.iloc[1:21]['data'].to_json()
)

In [4]:
import pandas as pd
import json

def exec(conn, query, params=()):
    with conn.cursor() as cur:
        cur.execute(query, params)
        results = cur.fetchall()
        results_df = pd.DataFrame(results, columns=[desc[0] for desc in cur.description])
        return results_df

In [5]:
with psycopg2.connect(**conn_params) as conn:
    df = exec(conn, """SELECT * FROM get_vector_rerank_pagerank_rrf2_cases('Water leaking into the apartment from the floor above.', 
                    50, 52);""")
    df_graph = df
    df

UndefinedFunction: function get_vector_rerank_pagerank_rrf2_cases(unknown, integer, integer) does not exist
LINE 1: SELECT * FROM get_vector_rerank_pagerank_rrf2_cases('Water l...
                      ^
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.


In [9]:
import pandas as pd
import json
import openai
import numpy as np
import psycopg2
from psycopg2.extensions import register_adapter, AsIs
from psycopg2.extras import Json
import os
from dotenv import load_dotenv

# Load environment variables and specify the file path if needed
load_dotenv()

# Database setup
def addapt_vector(nparray):
    """Adapt a numpy array to the PostgreSQL VECTOR type."""
    vector_str = ','.join(map(str, nparray))
    return AsIs(f"'[{vector_str}]'::VECTOR")

register_adapter(np.ndarray, addapt_vector)

conn_params = {
    "dbname": os.getenv("POSTGRES_DATABASE"),
    "user": os.getenv("POSTGRES_USERNAME"),
    "password": os.getenv("POSTGRES_PASSWORD"),
    "host": os.getenv("POSTGRES_HOST"),
    "port": os.getenv("POSTGRES_PORT"),
}

# OpenAI setup
deployment_id = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT")
openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_key = os.getenv("AZURE_OPENAI_KEY")
openai.api_version = '2023-05-15'

# Function to generate embeddings using OpenAI
def create_embeddings(query, deployment=None):
    if deployment is None:
        deployment = deployment_id
    try:
        response = openai.embeddings.create(input=query, model=deployment)
        embedding = response.data[0].embedding
        return np.array(embedding)
    except Exception as e:
        print(f"Error generating embeddings: {e}")
        return None

# Function to execute SQL function and return a DataFrame
def execute_rerank_function(embedding, query, top_n, consider_n):
    try:
        with psycopg2.connect(**conn_params) as conn:
            with conn.cursor() as cursor:
                cursor.execute("""
                    SELECT * FROM get_vector_rerank_pagerank_rrf2_cases(%s, %s, %s, %s);
                """, (embedding, query, top_n, consider_n))
                
                results = cursor.fetchall()
                colnames = [desc[0] for desc in cursor.description]
                return pd.DataFrame(results, columns=colnames)
    except psycopg2.Error as e:
        print(f"Error executing SQL function: {e}")
        return None

# Azure OpenAI setup for generating legal insights
from openai import AzureOpenAI

# Initialize Azure OpenAI client
client = AzureOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    api_version=os.getenv("AZURE_OPENAI_VERSION")
)

def generate_legal_response(question, context):
    completion = client.chat.completions.create(
        model='gpt-4o',
        messages=[
            {
                "role": "system",
                "content": """You are a Legal Research AI Assistant that helps people understand relevant legal cases. Answer the user's QUESTION below using provided cases in the CONTEXT section. Keep your answer grounded in the cases in the CONTEXT section. If the CONTEXT section doesn’t contain the facts to answer the QUESTION, return NONE. Identify top 2 legal principles used in the cases provided, explain them and group prominent cases by those principles."""
            },
            {
                "role": "user",
                "content": f"QUESTION: {question}\nCONTEXT: {context}"
            }
        ],
        max_tokens=800,
        temperature=0.3,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None,
        stream=False
    )
    return completion.choices[0].message.content

# Main function to combine all steps
def run_legal_query(question, top_n=5, consider_n=10):
    query_text = "Water leaking into the apartment from the floor above."
    
    # Step 1: Generate embeddings for the query
    embedding = create_embeddings(query_text)
    
    if embedding is not None:
        # Step 2: Execute the SQL function with the embedding
        results_df = execute_rerank_function(embedding, query_text, top_n, consider_n)
        
        if not results_df.empty:
            # Step 3: Generate a legal AI response using the results as context
            context = results_df.to_json(orient='records')
            response = generate_legal_response(question, context)
            return response
        else:
            return "No relevant cases found."
    else:
        return "Failed to generate embedding."

# Example execution
result = run_legal_query(
    question="What are the prominent legal precedents in Washington on water leakage issues?"
)
print(result)


The prominent legal precedents in Washington on water leakage issues are primarily grounded in two legal principles: negligence and implied warranty of fitness. Here are the top two legal principles and how they are applied in the cases provided:

### 1. Negligence
Negligence is a failure to exercise the care that a reasonably prudent person would exercise in like circumstances. In the context of water leakage, this principle often involves the responsibility of landlords or municipalities to maintain premises or infrastructure to prevent foreseeable damage.

#### Cases:
- **Le Vette v. Hardman Estate**: The Washington Supreme Court held that the landlord could be liable for damages caused by water leakage from the upper stories of a building. The court emphasized that the landlord had a duty to control and preserve the upper stories to keep them in a condition of reasonable safety, especially since the landlord knew about previous break-ins and potential water damage risks.
- **Tombar

In [11]:
import pandas as pd
import json
import openai
import numpy as np
import psycopg2
from psycopg2.extensions import register_adapter, AsIs
from psycopg2.extras import Json
import os
from dotenv import load_dotenv

# Load environment variables and specify the file path if needed
load_dotenv()

# Database setup
def addapt_vector(nparray):
    """Adapt a numpy array to the PostgreSQL VECTOR type."""
    vector_str = ','.join(map(str, nparray))
    return AsIs(f"'[{vector_str}]'::VECTOR")

register_adapter(np.ndarray, addapt_vector)

conn_params = {
    "dbname": os.getenv("POSTGRES_DATABASE"),
    "user": os.getenv("POSTGRES_USERNAME"),
    "password": os.getenv("POSTGRES_PASSWORD"),
    "host": os.getenv("POSTGRES_HOST"),
    "port": os.getenv("POSTGRES_PORT"),
}

# OpenAI setup for Azure
deployment_id = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT")
openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_key = os.getenv("AZURE_OPENAI_KEY")
openai.api_version = '2023-05-15'

# Function to generate embeddings using Azure OpenAI
def create_embeddings(query, deployment=None):
    if deployment is None:
        deployment = deployment_id
    try:
        response = openai.Embedding.create(input=query, deployment_id=deployment)
        embedding = response['data'][0]['embedding']
        return np.array(embedding)
    except Exception as e:
        print(f"Error generating embeddings: {e}")
        return None

# Generate the embedding for the query "water leaking"
water_leaking_vector = create_embeddings("water leaking")

print("water leaking vector:",water_leaking_vector)

if water_leaking_vector is not None:
    with psycopg2.connect(**conn_params) as conn:
        with conn.cursor() as cursor:
            # Step 1: Create the new table for the demo
            cursor.execute("CREATE TABLE IF NOT EXISTS demo_cases AS TABLE cases WITH NO DATA;")
            conn.commit()

            # Step 2: Get top 100 vector search results for 'water leaking'
            top_100_query = """
                SELECT id, data
                FROM cases
                ORDER BY description_vector <=> %s
                LIMIT 100;
            """
            cursor.execute(top_100_query, (water_leaking_vector,))
            top_100_cases = cursor.fetchall()
            top_100_ids = [row[0] for row in top_100_cases]
            print("Top 100 case IDs:", top_100_ids)
            print("Number of top 100 cases:", len(top_100_ids))

            # Step 3: Insert the top 100 cases into the demo table
            insert_query = """
                INSERT INTO demo_cases
                SELECT * FROM cases
                WHERE id = ANY(%s);
            """
            cursor.execute(insert_query, (top_100_ids,))
            conn.commit()

            # Step 4: Extract cited case IDs and add them to demo_cases
            cited_case_ids = set()  # Use a set to avoid duplicate entries

            for case_id, data in top_100_cases:
                cites_to = data.get("cites_to", [])
                
                for citation in cites_to:
                    case_ids = citation.get("case_ids", [])
                    cited_case_ids.update(case_ids)         

            # Remove any cited case IDs already in top_100_ids
            cited_case_ids.difference_update(top_100_ids)
            cited_case_ids = list(cited_case_ids)  # Convert back to list for SQL query
            print("Cited case IDs:", cited_case_ids)
            print("Number of cited cases:", len(cited_case_ids))

            # Step 5: Insert the cited cases into the demo table
            if cited_case_ids:
                insert_cited_query = """
                    INSERT INTO demo_cases
                    SELECT * FROM cases
                    WHERE id = ANY(%s::text[]);
                """
                cursor.execute(insert_cited_query, (cited_case_ids,))
                conn.commit()

    print("Demo table created with top 100 vector search results and cited references.")
else:
    print("Failed to generate embedding for 'water leaking'. Check Azure OpenAI setup.")


water leaking vector: [-0.03701193  0.01324961  0.04936923 ... -0.02346988  0.00664355
 -0.01079765]
Top 100 case IDs: ['3329625', '1346648', '1127960', '3335075', '4273930', '3862445', '3322332', '591938', '674990', '881896', '5228201', '300595', '3335809', '5752736', '615468', '3842559', '3863658', '645120', '838633', '520500', '881757', '1781976', '3841816', '3320973', '4938756', '3335022', '515664', '1804074', '1036848', '481657', '5281483', '522235', '512229', '3844082', '277671', '3867373', '1789717', '1031638', '1706351', '1784895']
Number of top 100 cases: 40
Cited case IDs: [3868672, 867842, 1789442, 568326, 32265, 904205, 5072397, 1070606, 3491856, 871953, 1774100, 1798164, 886295, 1127960, 936473, 701978, 782360, 3863069, 1608221, 3819038, 1192481, 778273, 488995, 8842279, 2139687, 1082926, 46127, 3878962, 1169970, 820275, 1817139, 918583, 5753912, 849980, 3826749, 881725, 467004, 2075202, 1199174, 4458569, 873548, 634444, 2498126, 3831887, 3284048, 780367, 670290, 866387, 1