In [None]:
pip install psycopg2-binary oci pandas pyarrow tqdm ipython weasyprint sentence-transformers torch einops

In [None]:
# --- 1. Imports ---
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import psycopg2
from sentence_transformers import SentenceTransformer
import weasyprint
from psycopg2 import sql

# --- 2. PostgreSQL Config ---
# Consider using environment variables or a config file for security
DB_CONFIG = {
    "dbname": "postgres",
    "user": "postgres",
    "password": "",
    "host": "10.150.2.103",
    "port": "5432"
}

# --- 3. Load Embedding Model ---
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)

# --- 4. Function to Query Top Matches ---
def search_similar_sentences(input_text, jurisdiction_filter=None, source_filter=None, top_k=10):
    embedding = model.encode(input_text).tolist()
    
    with psycopg2.connect(**DB_CONFIG) as conn:
        with conn.cursor() as cursor:
            # Convert the embedding list to a properly formatted string
            embedding_str = "[" + ",".join(map(str, embedding)) + "]"
            
            # Build the query with explicit type casting
            base_query = """
                SELECT id, content, jurisdiction, source, citation,
                      1 - (embedding <=> %s::vector) AS similarity
                FROM legal_docs_v4
            """
            
            # Add filters if provided
            conditions = []
            params = [embedding_str]  # Pass the string representation
            
            if jurisdiction_filter:
                conditions.append("jurisdiction = %s")
                params.append(jurisdiction_filter)
            if source_filter:
                conditions.append("source = %s")
                params.append(source_filter)
                
            if conditions:
                base_query += " WHERE " + " AND ".join(conditions)
                
            base_query += " ORDER BY embedding <=> %s::vector LIMIT %s;"
            params.extend([embedding_str, top_k])
            
            cursor.execute(base_query, params)
            results = cursor.fetchall()
            
    # Convert to DataFrame
    df = pd.DataFrame(results, columns=['id', 'content', 'jurisdiction', 'source', 'citation', 'similarity'])
    return df
            
  
 

# --- 5. Search UI ---
input_box = widgets.Textarea(
    value="Enter your legal query here...",
    placeholder="Enter sentence or legal query",
    description="Query:",
    layout=widgets.Layout(width='100%', height='100px')
)

# Updated to match your data schema
jurisdiction_dropdown = widgets.Dropdown(
    options=[""] + ["new_south_wales", "victoria", "queensland", "south_australia","tasmania","norfolk_island","western_australia","commonwealth"],
    description='Jurisdiction:',
    layout=widgets.Layout(width='50%')
)

source_dropdown = widgets.Dropdown(
    options=[""] + ["nsw_caselaw", "south_australian_legislation", "high_court_of_australia", "western_australian_legislation","queensland_legislation", "tasmanian_legislation", "federal_court_of_australia","nsw_legislation"],
    description='Source:',
    layout=widgets.Layout(width='50%')
)

search_button = widgets.Button(description="🔍 Search", button_style='primary')
export_button = widgets.Button(description="📝 Export to HTML/PDF", button_style='info')

output_area = widgets.Output()

# --- 6. Click Handler ---
def on_search_clicked(b):
    with output_area:
        output_area.clear_output()
        query = input_box.value.strip()
        if not query or query == "Enter your legal query here...":
            display(HTML("<b style='color:red;'>Please enter a search query</b>"))
            return
            
        jur = jurisdiction_dropdown.value if jurisdiction_dropdown.value else None
        src = source_dropdown.value if source_dropdown.value else None
        
        try:
            df = search_similar_sentences(query, jur, src)
            
            if df.empty:
                display(HTML("<b style='color:red;'>No results found</b>"))
                return
                
            display(HTML("<h3>🔎 Top Matching Legal Cases</h3>"))
            rows_html = []
            for _, row in df.iterrows():
                content_preview = row['content'][:200].replace('\n', ' ') + "..."
                citation_html = f'<b>{row["citation"]}</b>'
                full_text_div = f"""
                    <details>
                        <summary><b>View Full Text</b></summary>
                        <pre style='white-space: pre-wrap; background:#f9f9f9; border:1px solid #ccc; padding:10px;'>{row["content"]}</pre>
                    </details>
                """
                rows_html.append(f"""
                    <div style='border:1px solid #ddd; padding:10px; margin-bottom:10px; border-radius:5px;'>
                        <b>Citation:</b> {citation_html}<br>
                        <b>Jurisdiction:</b> {row['jurisdiction']}<br>
                        <b>Source:</b> {row['source']}<br>
                        <b>Similarity Score:</b> {round(row['similarity'], 4)}<br>
                        {full_text_div}
                    </div>
                """)
            display(HTML("".join(rows_html)))
            export_button.df = df  # Store results for export
            
        except Exception as e:
            display(HTML(f"<b style='color:red;'>Error: {str(e)}</b>"))

# --- 7. Export Handler ---
def on_export_clicked(b):
    df = getattr(export_button, "df", None)
    if df is None or df.empty:
        with output_area:
            display(HTML("<b style='color:red;'>❌ No search results to export!</b>"))
        return

    try:
        # Create HTML content
        html_parts = ["<h2>Exported Legal Case Results</h2>"]
        for _, row in df.iterrows():
            html_parts.append(f"""
                <div style='margin-bottom:20px;'>
                    <b>Citation:</b> {row['citation']}<br>
                    <b>Jurisdiction:</b> {row['jurisdiction']}<br>
                    <b>Source:</b> {row['source']}<br>
                    <b>Similarity:</b> {round(row['similarity'], 4)}<br>
                    <pre style='white-space: pre-wrap;'>{row['content']}</pre>
                    <hr>
                </div>
            """)

        html_output = "\n".join(html_parts)
        with output_area:
            display(HTML("<h4>📄 Exported HTML Below:</h4>"))
            display(HTML(html_output))

            # Generate PDF
            pdf_bytes = weasyprint.HTML(string=html_output).write_pdf()
            display(HTML(f"<b>✅ PDF generated (size: {len(pdf_bytes)//1024} KB)</b>"))
            
            # Option to download
            from IPython.display import Javascript
            display(Javascript(f"""
                var blob = new Blob([{list(pdf_bytes)}], {{type: 'application/pdf'}});
                var a = document.createElement('a');
                a.href = URL.createObjectURL(blob);
                a.download = 'legal_search_results.pdf';
                document.body.appendChild(a);
                a.click();
                document.body.removeChild(a);
            """))
            
    except Exception as e:
        with output_area:
            display(HTML(f"<b style='color:red;'>Export error: {str(e)}</b>"))

# --- 8. Bind Buttons ---
search_button.on_click(on_search_clicked)
export_button.on_click(on_export_clicked)

# --- 9. Display UI ---
display(widgets.VBox([
    input_box,
    widgets.HBox([jurisdiction_dropdown, source_dropdown]),
    widgets.HBox([search_button, export_button]),
    output_area
]))

<All keys matched successfully>


VBox(children=(Textarea(value='Enter your legal query here...', description='Query:', layout=Layout(height='10…