In [1]:
import openai
from dotenv import load_dotenv
import os
import psycopg2
import uuid
import tiktoken

In [2]:
# Load environment variables from .env file
load_dotenv()

# Get ChatGPT key and create client
openai.api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI()

# Get database configuration
db_config = {
    "host": os.getenv("DB_HOST"),
    "database": os.getenv("DB_NAME"),
    "user": os.getenv("DB_USER"),
    "password": os.getenv("DB_PASSWORD")
}

# Define configuration variables for API call
completion_config = {
    "GPT_MODEL": "gpt-4o-mini", # Required
    "TEMPERATURE": 0,
    "MAX_TOKENS": 500,
    "MAX_INPUT_TOKENS": 8000, # Adjust based on the model's token limit, not passed to API
}

In [3]:
# test db connection
with psycopg2.connect(**db_config) as conn:
    with conn.cursor() as cursor:
        cursor.execute("SELECT version();")
        version = cursor.fetchone()
        print("PostgreSQL Version:", version[0])

PostgreSQL Version: PostgreSQL 16.4 (Ubuntu 16.4-0ubuntu0.24.04.2) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 13.2.0-23ubuntu4) 13.2.0, 64-bit


In [4]:
class Assistant:
    def __init__(self, config):
        """
        Initialize the Assistant with required and optional parameters for the config

        Args:
            config (dict): Configuration dictionary with required keys:
                - 'GPT_MODEL': eg. 'gpt-4o-mini'
            Optional keys:
                - 'TEMPERATURE': The temperature for the model (default 0)
                - 'MAX_TOKENS': Maximum tokens for completion (default 500)
                - 'MAX_INPUT_TOKENS': Used internally to check token limits (default 8000)
        """
        # Required parameter
        self.model = config.get("GPT_MODEL")
        if not self.model:
            raise ValueError("GPT_MODEL is required in the configuration")

        # Optional parameters with defaults
        self.config = {
            "temperature": config.get("TEMPERATURE", 0),
            "max_tokens": config.get("MAX_TOKENS", 500),
            "max_input_tokens": config.get("MAX_INPUT_TOKENS", 8000),
        }

        # Include any additional optional parameters
        self.extra_config = {k: v for k, v in config.items() if k not in ["GPT_MODEL", "TEMPERATURE", "MAX_TOKENS", "MAX_INPUT_TOKENS"]}

        self.session_id = None
        self.messages = []
        self.start_new_session()

    def start_new_session(self):
        """Start a new session with a unique session ID."""
        self.session_id = str(uuid.uuid4())
        print(f"Started a new session with ID: {self.session_id}")

    def create_conversation(self, user_id, system_prompt):
        """Create a new conversation with the given system prompt"""
        with psycopg2.connect(**db_config) as conn:
            with conn.cursor() as cursor:
                cursor.execute(
                    """
                    INSERT INTO conversations (user_id, session_id, system_prompt, depth, conv_status)
                    VALUES (%s, %s, %s, %s, %s)
                    RETURNING id;
                    """,
                    (user_id, self.session_id, system_prompt, 0, 'active')
                )
                conversation_id = cursor.fetchone()[0]
                print(f"Created a new conversation with ID: {conversation_id}")
                return conversation_id

    def add_message(self, conversation_id, role, content):
        """Store a message in the database with accurate token counting."""
        with psycopg2.connect(**db_config) as conn:
            with conn.cursor() as cursor:
                # Count tokens using the OpenAI tokenizer
                encoding = tiktoken.encoding_for_model(self.model)
                token_count = len(encoding.encode(content))

                # Insert the message into the database
                cursor.execute(
                    """
                    INSERT INTO messages (conversation_id, role, content, token_count)
                    VALUES (%s, %s, %s, %s);
                    """,
                    (conversation_id, role, content, token_count)
                )
                print(f"Added message to conversation {conversation_id} with {token_count} tokens.")

    def load_history(self, conversation_id):
        """Load the entire message history of a conversation"""
        with psycopg2.connect(**db_config) as conn:
            with conn.cursor() as cursor:
                cursor.execute(
                    """
                    SELECT role, content FROM messages
                    WHERE conversation_id = %s
                    ORDER BY id;
                    """,
                    (conversation_id,)
                )
                history = cursor.fetchall()
                return [{"role": role, "content": content} for role, content in history]
    
    def query(self, conversation_id, user_input):
        """ Handle a user query and make API call."""
        self.add_message(conversation_id, "user", user_input)

        # Load the full conversation history
        self.messages = self.load_history(conversation_id)
        
        # Query the API
        try:
            completion = client.chat.completions.create(
                model = self.model,
                messages = self.messages,
                temperature = self.config['temperature'],
                max_tokens = self.config['max_tokens'],
                **self.extra_config, # Pass dynamic parameters
            )
            content = completion.choices[0].message.content

            # Add the assistant's response to the message history
            self.add_message(conversation_id, "assistant", content)
            print(content)
            return content
        
        except openai.OpenAIError as e:    
            print("Error during API call:", e)
            return None

    def replay_with_modified_prompt(self, conversation_id, new_system_prompt, user_id):
        """Replay the conversation history with a new system prompt"""
        # Load the original conversation history
        history = self.load_history(conversation_id)

        # Create a new conversation under the current session
        new_conversation_id = self.create_conversation(user_id, new_system_prompt)

        # Add the new system prompt
        self.add_message(new_conversation_id, "system", new_system_prompt)

        # Replay user messages with the new system prompt
        for message in history:
            if message["role"] == "user":
                user_input = message["content"]
                self.query(new_conversation_id, user_input)

        print(f"Replayed conversation created with ID: {new_conversation_id}")
        return new_conversation_id
        

In [5]:
bot = Assistant(completion_config)

Started a new session with ID: f1305b8c-63f0-4801-86a5-82f962fee6d1


In [6]:
user = 'Test'
prompt = 'You are a helpful assistant who keeps answers short'

In [7]:
id = bot.create_conversation(user, prompt)

Created a new conversation with ID: 9


In [8]:
bot.query(id, "What are some of the most important topics in machine learning")

Added message to conversation 9 with 11 tokens.
Added message to conversation 9 with 500 tokens.
Machine learning is a vast and rapidly evolving field, encompassing a wide range of topics. Here are some of the most important and foundational topics in machine learning:

1. **Supervised Learning**:
   - **Regression**: Techniques for predicting continuous outcomes (e.g., linear regression, polynomial regression).
   - **Classification**: Methods for categorizing data into discrete classes (e.g., logistic regression, decision trees, support vector machines, neural networks).

2. **Unsupervised Learning**:
   - **Clustering**: Grouping similar data points (e.g., k-means, hierarchical clustering, DBSCAN).
   - **Dimensionality Reduction**: Techniques for reducing the number of features (e.g., PCA, t-SNE, UMAP).

3. **Reinforcement Learning**:
   - Understanding how agents learn to make decisions by interacting with an environment (e.g., Q-learning, deep reinforcement learning).

4. **Deep 

'Machine learning is a vast and rapidly evolving field, encompassing a wide range of topics. Here are some of the most important and foundational topics in machine learning:\n\n1. **Supervised Learning**:\n   - **Regression**: Techniques for predicting continuous outcomes (e.g., linear regression, polynomial regression).\n   - **Classification**: Methods for categorizing data into discrete classes (e.g., logistic regression, decision trees, support vector machines, neural networks).\n\n2. **Unsupervised Learning**:\n   - **Clustering**: Grouping similar data points (e.g., k-means, hierarchical clustering, DBSCAN).\n   - **Dimensionality Reduction**: Techniques for reducing the number of features (e.g., PCA, t-SNE, UMAP).\n\n3. **Reinforcement Learning**:\n   - Understanding how agents learn to make decisions by interacting with an environment (e.g., Q-learning, deep reinforcement learning).\n\n4. **Deep Learning**:\n   - Neural networks and their architectures (e.g., convolutional neur