# Using LLMs to Explore the IUPHAR Guide to Pharmacology
Experiment with LLMs for the IUPHAR/BPS Guide to Pharmacology

In [52]:
import sqlite3
import base64
import logging
import codecs
import requests
import psycopg2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

To log any errors

In [53]:
logging.basicConfig(filename="query_errors.log", level=logging.ERROR)

# Connect to database

In [54]:
def connect_to_db():
    try:
        conn = psycopg2.connect(**db_config)
        return conn
    except Exception as e:
        print(f"Error connecting to the database: {e}")
        return None
    
# Password generation function
def pwd():
    s1 = ''.join([chr(int(i)) for i in ['120', '65', '103', '108', '101', '116', '116', '55']])
    s2 = base64.b64encode(s1.encode('utf-8')).decode('utf-8')
    s3 = codecs.encode(s2[::-1], 'rot_13')
    s4 = codecs.decode(s3[::-1], 'rot_13')
    return base64.b64decode(s4).decode('utf-8')


# Database connection details
db_config = {
    'host': 'localhost',
    'database': 'guide_to_pharmacology',
    'user': 'postgres',
    'password': pwd(),
}

In [55]:
conn = connect_to_db()
if conn:
    print("Connected to the database successfully.")
else:
    print("Failed to connect to the database. Please check your credentials.")

Connected to the database successfully.


# Function to execute query and return results

In [56]:
# Function to execute a query and return results
def execute_query(conn, query):
    try:
        return pd.read_sql_query(query, conn)
    except Exception as e:
        print(f"Error executing query: {e}")
        return None

# Split dataset

In [57]:
# Function to split the dataset
def split_dataset(file_path):
    df = pd.read_csv(file_path)
    difficulty_cols = ["Difficulty: Easy", "Difficulty: Easy-Moderate", "Difficulty: Moderate-Hard", "Difficulty: Hard"]
    df["Difficulty"] = df[difficulty_cols].idxmax(axis=1).str.replace("Difficulty: ", "")
    train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["Difficulty"], random_state=42)
    return train_df, test_df

In [58]:
file_path = "Training/all_queries_categorised_train.csv"  # Path to your dataset
train_df, test_df = split_dataset(file_path)
print("Dataset split into training and testing sets.")

Dataset split into training and testing sets.


# Evaluate Query Results

In [59]:
# Function to evaluate the query results
def evaluate_query(expected_df, result_df, min_columns):
    if expected_df is None or result_df is None:
        return {"Precision": 0, "Recall": 0, "Accuracy": 0, "Missed Columns": min_columns, "Extra Columns": 0}

    expected_columns = set(expected_df.columns)
    result_columns = set(result_df.columns)

    missed_columns = expected_columns - result_columns
    extra_columns = result_columns - expected_columns

    true_positive = len(expected_columns & result_columns)
    precision = true_positive / len(result_columns) if result_columns else 0
    recall = true_positive / len(expected_columns) if expected_columns else 0
    accuracy = int(len(missed_columns) == 0 and len(extra_columns) == 0)

    return {
        "Precision": precision,
        "Recall": recall,
        "Accuracy": accuracy,
        "Missed Columns": len(missed_columns),
        "Extra Columns": len(extra_columns),
    }

# Evaluate the Dataset

In [60]:
def evaluate_dataset(df, conn):
    metrics = []
    for _, row in df.iterrows():
        sql_query = row["SQL"]
        second_sql = row.get("2nd SQL")
        min_columns = row.get("Minimum output columns", 0)

        # Attempt to execute the main SQL query
        expected_df = execute_query(conn, sql_query)
        result_df = execute_query(conn, sql_query)

        # If either query failed, log and continue
        if expected_df is None or result_df is None:
            logging.error(f"Failed to execute main query for Query ID {row['ID']}: {sql_query}")
            continue

        main_eval = evaluate_query(expected_df, result_df, min_columns)

        if second_sql and pd.notnull(second_sql):
            # Attempt to execute the second SQL query
            second_expected_df = execute_query(conn, second_sql)
            if second_expected_df is None:
                logging.error(f"Failed to execute second query for Query ID {row['ID']}: {second_sql}")
                second_eval = {"Precision": 0, "Recall": 0, "Accuracy": 0, "Missed Columns": min_columns, "Extra Columns": 0}
            else:
                second_eval = evaluate_query(second_expected_df, result_df, min_columns)
            
            # Combine evaluations (use max values for each metric)
            for key in main_eval:
                main_eval[key] = max(main_eval[key], second_eval[key])

        metrics.append({
            "Query ID": row["ID"],
            "Precision": main_eval["Precision"],
            "Recall": main_eval["Recall"],
            "Accuracy": main_eval["Accuracy"],
            "Missed Columns": main_eval["Missed Columns"],
            "Extra Columns": main_eval["Extra Columns"],
        })

    return pd.DataFrame(metrics)

In [61]:
print("Evaluating the test dataset...")
test_metrics = evaluate_dataset(test_df, conn)
print("Evaluation results:")
print(test_metrics)

Evaluating the test dataset...




Evaluation results:
    Query ID  Precision  Recall  Accuracy  Missed Columns  Extra Columns
0         65        1.0     1.0         1               0              0
1         69        1.0     1.0         1               0              0
2         29        1.0     1.0         1               0              0
3         16        1.0     1.0         1               0              0
4         15        1.0     1.0         1               0              0
5         58        1.0     1.0         1               0              0
6         61        1.0     1.0         1               1              1
7         56        1.0     1.0         1               0              0
8         42        1.0     1.0         1               0              0
9         23        1.0     1.0         1               0             26
10        38        1.0     1.0         1               3              1




# Run your Own Queries

In [62]:
while True:
    query = input("Enter an SQL query to execute (or type 'exit' to quit): ")
    if query.lower() == 'exit':
        break

    results = execute_query(conn, query)
    if results is not None and not results.empty:
        print("Query Results:")
        print(results)
    else:
        print("No results found or an error occurred.")



Error executing query: Execution failed on sql 'list all ligands': syntax error at or near "list"
LINE 1: list all ligands
        ^

No results found or an error occurred.
Error executing query: Execution failed on sql '': can't execute an empty query
No results found or an error occurred.


# Close DB Connection

In [63]:
conn.close()
print("Database connection closed.")

Database connection closed.
