In [1]:
import polars as pl
from neo4j import GraphDatabase
from google import genai
import os
import time
from extract_instructions import facts_of_case, purpose_and_character, nature_of, amount_used, market_eff, weigh_four_factors
from dotenv import load_dotenv

dotenv_path = os.path.abspath("../.env")  # Adjust the path accordingly
load_dotenv(dotenv_path)

instructions = {
    'Facts': facts_of_case,
    'Purpose': purpose_and_character,
    'Nature': nature_of,
    'Amount': amount_used,
    'Market': market_eff,
    'Combined': weigh_four_factors
}

client = genai.Client(api_key=os.environ["GEMINI_API"])

URI = "bolt://localhost:7687"
AUTH = ("neo4j", "fairusecases")

In [2]:
def extraction(instruction: str, document: str):

    try:
        response = client.models.generate_content(
            model = "gemini-2.0-flash",
            contents = instruction + "\n\n" + document
        )

        time.sleep(10)

        return response.text
    
    except:

        print("Going to Sleep!")
        time.sleep(60)

        return None

In [3]:
def opinion_does_not_have_facts(tx):
    
    results = tx.run("""
        MATCH (o:Opinion)
        WHERE NOT EXISTS {
            MATCH (o)<-[:OF]-(:Facts)
        }
        RETURN o.URL as url, o.Document as Document
        """
    )
         
    return results.to_df()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
            no_facts = session.execute_read(opinion_does_not_have_facts)

In [4]:
## Dealing with Facts

def opinion_does_not_have_facts(tx):
    
    results = tx.run("""
        MATCH (o:Opinion)
        WHERE NOT EXISTS {
            MATCH (o)<-[:OF]-(:Facts)
        }
        RETURN o.Document as Document
        """
    )
         
    return results.to_df()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
            no_facts = session.execute_read(opinion_does_not_have_facts)

no_facts = pl.from_pandas(no_facts)

def add_facts_to_opinion(tx, full_text, doc):
    
    results = tx.run("""
        MATCH (o:Opinion {Document: $full_text})
        MERGE (f:Facts {Document: $doc})-[:OF]->(o)
        """, full_text = full_text, doc = doc
    )
            
    return results

for opinion in no_facts.iter_rows(named=True):

    print("Processing: ",opinion["Document"][:20])
            
    extracted = extraction(instructions["Facts"], opinion["Document"])

    if extracted is not None:

        extracted = extracted.replace("\"", "\'")

        with GraphDatabase.driver(URI, auth=AUTH) as driver:
            with driver.session(database="neo4j") as session:
                no_facts = session.execute_write(add_facts_to_opinion, opinion["Document"], extracted) 


In [None]:
## Dealing with Purpose

def opinion_does_not_have_purpose(tx):
    
    results = tx.run("""
        MATCH (o:Opinion)
        WHERE NOT EXISTS {
            MATCH (o)<-[:OF]-(:Purpose)
        }
        RETURN o.Document as Document
        """
    )
         
    return results.to_df()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
            no_purpose = session.execute_read(opinion_does_not_have_purpose)

no_purpose = pl.from_pandas(no_purpose)

def add_purpose_to_opinion(tx, full_text, doc):
    
    results = tx.run("""
        MATCH (o:Opinion {Document: $full_text})
        MERGE (f:Purpose {Document: $doc})-[:OF]->(o)
        """, full_text = full_text, doc = doc
    )
            
    return results

for opinion in no_purpose.iter_rows(named=True):

    print("Processing: ", opinion["Document"][:30])
            
    extracted = extraction(instructions["Purpose"], opinion["Document"])

    if extracted is not None:

        extracted = extracted.replace("\"", "\'")

        with GraphDatabase.driver(URI, auth=AUTH) as driver:
            with driver.session(database="neo4j") as session:
                session.execute_write(add_purpose_to_opinion, opinion["Document"], extracted) 


Processing:  
--- Page 1 ---
754 F.Supp.2d 


In [None]:
## Dealing with Nature

def opinion_does_not_have_nature(tx):
    
    results = tx.run("""
        MATCH (o:Opinion)
        WHERE NOT EXISTS {
            MATCH (o)<-[:OF]-(:Nature)
        }
        RETURN o.Document as Document
        """
    )
         
    return results.to_df()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
            no_nature = session.execute_read(opinion_does_not_have_nature)

no_nature = pl.from_pandas(no_nature)

def add_nature_to_opinion(tx, full_text, doc):
    
    results = tx.run("""
        MATCH (o:Opinion {Document: $full_text})
        MERGE (f:Nature {Document: $doc})-[:OF]->(o)
        """, full_text = full_text, doc = doc
    )
            
    return results

for opinion in no_nature.iter_rows(named=True):

    print("Processing: ",opinion["Document"][:30])
            
    extracted = extraction(instructions["Nature"], opinion["Document"])

    if extracted is not None:

        extracted = extracted.replace("\"", "\'")

        with GraphDatabase.driver(URI, auth=AUTH) as driver:
            with driver.session(database="neo4j") as session:
                session.execute_write(add_nature_to_opinion, opinion["Document"], extracted) 


In [7]:
## Dealing with Amount

def opinion_does_not_have_amount(tx):
    
    results = tx.run("""
        MATCH (o:Opinion)
        WHERE NOT EXISTS {
            MATCH (o)<-[:OF]-(:Amount)
        }
        RETURN o.Document as Document
        """
    )
         
    return results.to_df()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
            no_amount = session.execute_read(opinion_does_not_have_amount)

no_amount = pl.from_pandas(no_amount)

def add_amount_to_opinion(tx, full_text, doc):
    
    results = tx.run("""
        MATCH (o:Opinion {Document: $full_text})
        MERGE (f:Amount {Document: $doc})-[:OF]->(o)
        """, full_text = full_text, doc = doc
    )
            
    return results

for opinion in no_amount.iter_rows(named=True):

    print("Processing: ",opinion["Document"][:30])
            
    extracted = extraction(instructions["Amount"], opinion["Document"])

    if extracted is not None:

        extracted = extracted.replace("\"", "\'")

        with GraphDatabase.driver(URI, auth=AUTH) as driver:
            with driver.session(database="neo4j") as session:
                no_facts = session.execute_write(add_amount_to_opinion, opinion["Document"], extracted) 


Processing:  <pre class="inline">          


In [8]:
## Dealing with Market

def opinion_does_not_have_market(tx):
    
    results = tx.run("""
        MATCH (o:Opinion)
        WHERE NOT EXISTS {
            MATCH (o)<-[:OF]-(:Market)
        }
        RETURN o.Document as Document
        """
    )
         
    return results.to_df()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
            no_market = session.execute_read(opinion_does_not_have_market)

no_market = pl.from_pandas(no_market)

def add_market_to_opinion(tx, full_text, doc):
    
    results = tx.run("""
        MATCH (o:Opinion {Document: $full_text})
        MERGE (f:Market {Document: $doc})-[:OF]->(o)
        """, full_text = full_text, doc = doc
    )
            
    return results

for opinion in no_market.iter_rows(named=True):

    print("Processing: ", opinion["Document"][:30])
            
    extracted = extraction(instructions["Market"], opinion["Document"])

    if extracted is not None:

        extracted = extracted.replace("\"", "\'")

        with GraphDatabase.driver(URI, auth=AUTH) as driver:
            with driver.session(database="neo4j") as session:
                no_facts = session.execute_write(add_market_to_opinion, opinion["Document"], extracted) 


In [9]:
## Dealing with Conclusion
def opinion_does_not_have_conclusion(tx):
    
    results = tx.run("""
        MATCH (o:Opinion)
        WHERE NOT EXISTS {
            MATCH (o)<-[:OF]-(:Conclusion)
        }
        RETURN o.Document as Document
        """
    )
         
    return results.to_df()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
            no_conclusion = session.execute_read(opinion_does_not_have_conclusion)

no_conclusion = pl.from_pandas(no_conclusion)

def add_conclusion_to_opinion(tx, full_text, doc):
    
    results = tx.run("""
        MATCH (o:Opinion {Document: $full_text})
        MERGE (f:Conclusion {Document: $doc})-[:OF]->(o)
        """, full_text = full_text, doc = doc
    )
            
    return results

for opinion in no_conclusion.iter_rows(named=True):

    print("Processing: ", opinion["Document"][:30])
            
    extracted = extraction(instructions["Combined"], opinion["Document"])

    if extracted is not None:

        extracted = extracted.replace("\"", "\'")

        with GraphDatabase.driver(URI, auth=AUTH) as driver:
            with driver.session(database="neo4j") as session:
                no_facts = session.execute_write(add_conclusion_to_opinion, opinion["Document"], extracted) 