In [115]:
input_query = 'LLMs'

In [116]:
import psycopg2
import os

def connection():
    """Creates and returns a new database connection."""
    try:
        conn = psycopg2.connect(
            user=os.environ["MY_INTEGRATION_USER"],
            password=os.environ["MY_INTEGRATION_PASSWORD"],
            host=os.environ["MY_INTEGRATION_HOST"],
            port=os.environ["MY_INTEGRATION_PORT"],
            database=os.environ["MY_INTEGRATION_DATABASE"]
        )
        
        # Test the connection
        with conn.cursor() as cursor:
            cursor.execute("SELECT version();")
            record = cursor.fetchone()
        
        return conn  # Return the connection object if successful

    except (Exception, psycopg2.Error) as error:
        print("Error while connecting to database", error)
        return None  # Return None if connection was not successful

conn = connection()

In [117]:
import json
from rich.console import Console
from rich.table import Table
from rich.text import Text

def display_query_papers(job_id):  # Change parameter to job_id
    # Create a console object for Rich output
    console = Console()

    # Connect to the database
    conn = connection()
    c = conn.cursor()
    
    # Fetch the query and the number of ranks already printed for the given job_id
    c.execute("SELECT query, printed_ranks FROM jobs WHERE job_id = %s", (job_id,))
    result = c.fetchone()
    if result:
        job_query, printed_ranks = result  # Unpack the result
    else:
        print(f"No job found with ID: {job_id}")
        return  # Exit the function if no job is found

    start_rank = printed_ranks # Start from the next rank

    # Fetch records for the given query starting from the next rank to be printed
    c.execute("""
        SELECT * FROM Query_Papers 
        WHERE query = %s AND final_rank >= %s AND final_rank IS NOT NULL 
        AND relevant_answer IS NOT NULL AND paper_stats IS NOT NULL 
        AND paper_metadata_filtered IS NOT NULL AND download_link IS NOT NULL
        ORDER BY final_rank ASC
        LIMIT 10
    """, (job_query, start_rank))

    # Fetch the column names
    columns = [description[0] for description in c.description]

    rows = c.fetchall()

    # Counter for the number of ranks printed during this function call
    ranks_printed_now = 0

    if rows:
        # Initialize a Rich table with improved formatting
        table = Table(show_header=True, title=job_query, expand=True, leading=1, show_lines=True)
        table.add_column("No.", style="cyan", justify="right", ratio=1)
        table.add_column("Paper", overflow="fold", ratio=20)  # This has twice the ratio of "Details", meaning it will be larger
        table.add_column("Details", overflow="fold", ratio=8)  # Half the 'ratio' of "Paper", making it relatively smaller
        table.add_column("Link", justify="center", ratio=2)
        for row in rows:
            # Extract the necessary fields from the row
            final_rank = row[columns.index('final_rank')]
            arxiv_link = row[columns.index('arxiv_link')]
            relevant_answer = row[columns.index('relevant_answer')]
            paper_stats = json.loads(row[columns.index('paper_stats')])
            paper_metadata_filtered = json.loads(row[columns.index('paper_metadata_filtered')])
            
            # Format extracted data
            title = paper_metadata_filtered.get('title', 'N/A')
            abstract = paper_metadata_filtered.get('abstract', 'N/A')
            abstract = (abstract[:197] + '...') if len(abstract) > 200 else abstract
            published_date = paper_metadata_filtered.get('published_date', 'N/A').split('T')[0] if paper_metadata_filtered.get('published_date', 'N/A') != 'N/A' else 'N/A'
            authors = paper_metadata_filtered.get('authors', ['N/A'])
            authors_str = ", ".join(authors[:3]) + ("..." if len(authors) > 3 else "")
            citations = paper_stats.get('citations', 'N/A')
            versions = paper_stats.get('versions', 'N/A')

            # Add the clickable 'Link' text
            link_text = f"[link={arxiv_link}]Link[/link]"

            # Format the Paper and Details columns
            paper_column = Text(f"{title}\n\nLLM response: {relevant_answer}\n\nAbstract: {abstract}", justify="left")
            details_column = Text(f"Citations: {citations}\nVersions: {versions}\nDate Published: {published_date}\nAuthors: {authors_str} \n", justify="left")
            
            # Add row with formatted data
            table.add_row(str(final_rank), paper_column, details_column, link_text)
            table.add_section()
            table.add_row()

            ranks_printed_now += 1

        # Update the number of printed ranks in the jobs table for this query
        new_total_printed = printed_ranks + ranks_printed_now
        c.execute("UPDATE jobs SET printed_ranks = %s WHERE query = %s", (new_total_printed, job_query))
        conn.commit()
    # Print the table to the console
    if ranks_printed_now > 0:
        console.print(table)
    # Closing database connections
    c.close()
    conn.close()

    return ranks_printed_now  # Optionally return the number of ranks printed in this call

# # Example usage
# job_query = "Top academic papers on ReAct framework for agents"
# ranks_printed_now = display_query_papers(job_query)
# print(f"Ranks printed this time: {ranks_printed_now}")


In [118]:
def add_new_job(query):
    # Connect to the database
    conn = connection()
    c = conn.cursor()

    # SQL statement to insert a new job and return its id
    c.execute("INSERT INTO jobs (query, job_status) VALUES (%s, 'new') RETURNING job_id", (query,))

    # Fetch the job_id of the newly inserted job
    job_id = c.fetchone()[0]
    print("\nHello there, we're busy working on your query: '{}'. Job ID: {}".format(query, job_id))

    # Commit the changes and close the connection
    conn.commit()
    c.close()
    conn.close()

    # Return the job_id for further use
    return job_id

# Usage
# input_query = "your input query here" 
job_query = "Top academic papers on " + input_query
job_id = add_new_job(job_query)
# print("Job ID for query '{}': {}".format(job_query, job_id))



Hello there, we're busy working on your query: 'Top academic papers on LLMs'. Job ID: 7


In [122]:
import sqlite3
import time
import datetime  # Import the datetime module
import os  # Import the os module for clearing the terminal
from IPython.display import clear_output

def wait_for_job_completion(job_id):  # Use job_id instead of job_query as the function parameter
    # Connect to the database
    conn = connection()
    c = conn.cursor()
    counter = 0  # Initialize the counter

    try:
        while True:  # Keep checking until the job is done
            # SQL statement to find the status of a job given its job_id
            c.execute("SELECT job_status, query FROM jobs WHERE job_id = %s", (job_id,))  # Use job_id to search
            result = c.fetchone()

            if result:
                job_status, job_query = result  # Unpack the result into job_status and job_query
                if job_status == 'done':
                    clear_output(wait=True)  # Clear output and wait for the next
                    display_query_papers(job_id)  # Assume this function should now work with job_query
                    break  # Exit the loop if the job is done
                elif job_status == 'running':
                    clear_output(wait=True)  # Clear output and wait for the next
                    counter += 1
                    print(f"Checking status ({counter}): ", end=' ')  # Print counter with "Running..."
                    print(f"The status of the job with ID '{job_id}' is currently '{job_status}'. Waiting for completion...")
                    time.sleep(1)  # Sleep for a while before checking again
            else:
                print(f"\nNo job found with ID: '{job_id}'.")
                break  # Exit the loop if no such job exists
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Close database resources
        if conn:
            c.close()
            conn.close()

# Usage example
wait_for_job_completion(job_id)


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=6d52007a-f237-4857-b1f1-3ccb95216ee4' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>