In [255]:
input_query = ''

In [264]:
# from IPython.display import HTML, Javascript

# def run_hello():
#     display(Javascript('alert("Hello, World!")'))

# display(HTML('<button onclick="alert(\'Hello, World!\')">Search</button>'))

In [256]:
import psycopg2
import os

def connection():
    """Creates and returns a new database connection."""
    try:
        conn = psycopg2.connect(
            user=os.environ["MY_INTEGRATION_USER"],
            password=os.environ["MY_INTEGRATION_PASSWORD"],
            host=os.environ["MY_INTEGRATION_HOST"],
            port=os.environ["MY_INTEGRATION_PORT"],
            database=os.environ["MY_INTEGRATION_DATABASE"]
        )
        
        # Test the connection
        with conn.cursor() as cursor:
            cursor.execute("SELECT version();")
            record = cursor.fetchone()
        
        return conn  # Return the connection object if successful

    except (Exception, psycopg2.Error) as error:
        print("Error while connecting to database", error)
        return None  # Return None if connection was not successful

In [272]:
import json
from rich.console import Console
from rich.table import Table
from rich.text import Text

def display_query_papers(job_id):  # Change parameter to job_id
    # Create a console object for Rich output
    console = Console()

    try:
        # Connect to the database
        conn = connection()
        c = conn.cursor()
    except Exception as e:
        console.print(f"[red]Error connecting to database: {e}[/red]")
        return
    
    try:
        # Fetch the query and responses for the given job_id
        c.execute("SELECT query, gpt_response, perplexity_response FROM jobs WHERE job_id = %s", (job_id,))
        result = c.fetchone()
        if not result:
            console.print(f"[yellow]No job found with ID: {job_id}[/yellow]")
            return
        job_query, gpt_response, perplexity_response = result  # Unpack the result

        # Print gpt_response and perplexity_response at the top
        # if gpt_response or perplexity_response:
        #     console.print(f"{gpt_response}\n\n{perplexity_response}", style="bold magenta")

        # Fetch records for the given job_id
        c.execute("""
            SELECT * FROM (
                SELECT DISTINCT ON (arxiv_link) * 
                FROM Query_Papers 
                WHERE job_id = %s AND final_rank IS NOT NULL 
                AND paper_stats IS NOT NULL 
                AND paper_metadata_filtered IS NOT NULL AND download_link IS NOT NULL
                ORDER BY arxiv_link, final_rank ASC
            ) AS distinct_papers
            ORDER BY final_rank ASC
            LIMIT 10
        """, (job_id,))

        rows = c.fetchall()
        # Here we define 'columns' by extracting the names from the cursor description
        columns = [desc[0] for desc in c.description]
        if not rows:
            console.print(f"[yellow]No results found for job ID: {job_id}[/yellow]")
            return

        # Initialize a Rich table with improved formatting
        table = Table(show_header=True, title=job_query, expand=True, leading=1, show_lines=True)
        table.add_column("No.", style="cyan", justify="right", ratio=1)
        table.add_column("Paper", overflow="fold", ratio=20)
        table.add_column("Details", overflow="fold", ratio=8)
        table.add_column("Link", justify="center", ratio=2)

        for index, row in enumerate(rows, start=1):  # start=1 means numbering starts from 1
            # Extract and format necessary data from the row
            arxiv_link = row[columns.index('arxiv_link')]
            relevant_answer = row[columns.index('relevant_answer')]
            paper_stats = json.loads(row[columns.index('paper_stats')])
            paper_metadata_filtered = json.loads(row[columns.index('paper_metadata_filtered')])
            
            # Format extracted data
            title = paper_metadata_filtered.get('title', 'N/A')
            abstract = paper_metadata_filtered.get('abstract', 'N/A')
            abstract = (abstract[:197] + '...') if len(abstract) > 200 else abstract
            published_date = paper_metadata_filtered.get('published_date', 'N/A').split('T')[0] if paper_metadata_filtered.get('published_date', 'N/A') != 'N/A' else 'N/A'
            authors = paper_metadata_filtered.get('authors', ['N/A'])
            authors_str = ", ".join(authors[:3]) + ("..." if len(authors) > 3 else "")
            citations = paper_stats.get('citations', 'N/A')
            versions = paper_stats.get('versions', 'N/A')

            # Add the clickable 'Link' text
            link_text = f"[link={arxiv_link}]Link[/link]"

            # Format the Paper and Details columns
            # Prepare the LLM response part only if clean_relevant_answer is not empty
            clean_relevant_answer = "" if relevant_answer is None else relevant_answer.strip()
            llm_response_part = f"\n\nLLM response: {clean_relevant_answer}" if clean_relevant_answer else ""

            # Prepare full text for 'paper_column' and 'details_column' before creating Text objects
            paper_text = f"{title}{llm_response_part}\n\nAbstract: {abstract}"
            details_text = f"Citations: {citations}\nVersions: {versions}\nDate Published: {published_date}\nAuthors: {authors_str} \n"

            # Now create Text objects using the prepared strings
            paper_column = Text(paper_text, justify="left")
            details_column = Text(details_text, justify="left")
            
            # Add row with formatted data
            table.add_row(str(index), paper_column, details_column, link_text)
            table.add_section()
            table.add_row()

                # Print the table to the console
        console.print(table)

    except Exception as e:
        console.print(f"[red]Error while fetching data from database: {e}[/red]")
    finally:
        # Close database connections safely
        try:
            c.close()
            conn.close()
        except Exception as e:
            console.print(f"[red]Error closing database connections: {e}[/red]")

# Example usage
# job_id = 49  # "Top academic papers on function calling"
# ranks_printed_now = display_query_papers(job_id)
# print(f"Ranks printed this time: {ranks_printed_now}")


In [262]:
def add_new_job(query):
    # Connect to the database
    conn = connection()
    c = conn.cursor()

    # SQL statement to insert a new job and return its id
    c.execute("INSERT INTO jobs (query, job_status) VALUES (%s, 'new') RETURNING job_id", (query,))

    # Fetch the job_id of the newly inserted job
    job_id = c.fetchone()[0]
    print("\nHello there, we're busy working on your query: '{}'. Please consider that processing time is between 30 to 90 seconds.".format(input_query))

    # Commit the changes and close the connection
    conn.commit()
    c.close()
    conn.close()

    # Return the job_id for further use
    return job_id

# Usage
# input_query = "your input query here" 
# print("Job ID for query '{}': {}".format(job_query, job_id))


In [265]:
import time
import datetime  # Import the datetime module
import os  # Import the os module for clearing the terminal
from IPython.display import clear_output

def wait_for_job_completion(job_id):  # Use job_id instead of job_query as the function parameter

    # Connect to the database
    conn = connection()
    c = conn.cursor()
    counter = 0  # Initialize the counter
    last_read_line = 0
    last_log_message = ""

    try:
        while True:  # Keep checking until the job is done
            # SQL statement to find the status of a job given its job_id
            c.execute("SELECT job_status, terminal_output FROM jobs WHERE job_id = %s", (job_id,))  # Use job_id to search
            result = c.fetchone()

            if result:
                job_status, terminal_output = result  # Unpack the result into job_status and terminal_output
                if job_status == 'done':
                    clear_output(wait=True)  # Clear output and wait for the next
                    display_query_papers(job_id)  # Assume this function should now work with job_query
                    break  # Exit the loop if the job is done
                elif job_status == 'running':
                    clear_output(wait=True)  # Clear output and wait for the next
                    counter += 1
                    print(f"Checking status ({counter} sec.): ", end=' ')  # Print counter with "Running..."
                    print(f"The status of the job with ID '{job_id}' is currently '{job_status}'. Waiting for completion...")

                    # Path to the log file
                    log_file_path = os.path.join(os.getcwd(), 'logs', f"{job_id}.log")
                    if os.path.exists(log_file_path):
                        with open(log_file_path, 'r') as file:
                            log_contents = file.readlines()
                            # Check if there are any new log messages
                            if last_read_line < len(log_contents):
                                # Update to the new log message
                                last_log_message = log_contents[-1].split('] - ', 1)[-1].strip() if '] - ' in log_contents[-1] else log_contents[-1]
                                last_read_line = len(log_contents)
                            # Print the most recent log message
                            print('Logs: ' + last_log_message)
                    else:
                        print("Log file not found.")

                    time.sleep(1)  # Sleep for a while before checking again
            else:
                print(f"\nNo job found with ID: '{job_id}'.")
                break  # Exit the loop if no such job exists
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Close database resources
        if conn:
            c.close()
            conn.close()

# Usage example
# job_id = 7

def submit_and_wait_for_job(input_query):
    # Add a new job and obtain its ID
    job_id = add_new_job(input_query)
    
    # Check if the job ID was successfully obtained
    if job_id is not None:
        print(f"Job {job_id} has been added successfully. Waiting for completion...")
        # Wait for the job to complete
        wait_for_job_completion(job_id)
        print(f"Job {job_id} has completed.")
    else:
        print("Failed to add new job.")

submit_and_wait_for_job(input_query)

Job 37 has completed.


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=6d52007a-f237-4857-b1f1-3ccb95216ee4' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>