A lightweight HTTP server (can use FastAPI or Flask) exposing:
- Endpoints
- POST /add_message
- GET /get_context
- POST /summarize_history
- POST /reset
- GET /tool_call?query=...
- Memory Management
- Store conversation as a list of {role, content} dicts.
- Summarize old messages using GPT or basic heuristic summarization.
- Track total token count using tiktoken and trim/summarize when approaching limit.

Tool Call
- Simple Wikipedia API fetch via requests.

In [None]:
# Import necessary libraries
from fastapi import FastAPI, Request
from pydantic import BaseModel
from typing import List, Dict
import requests
import tiktoken

In [None]:
# Create a FastAPI app instance
app = FastAPI()

In [2]:
# Memory storage:  
conversation_history: List[Dict] = []  # In-memory store for conversation history

# limit tokens
token_limit = 4000  # Define maximum token limit (around GPT-3.5-turbo's safe limit)

# Initialize token encoder for the GPT model
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

## Models
# Define a Message model for input validation using Pydantic
class Message(BaseModel):
    role: str     # 'user', 'assistant', or 'system'
    content: str  # the actual text content

# Endpoint to add a new message to conversation history
@app.post("/add_message")
def add_message(msg: Message):
    conversation_history.append(msg.dict())  # Store the message as a dictionary
    return {"status": "added"}                # Return a simple success response

# Endpoint to get the recent context within token limit
@app.get("/get_context")
def get_context():
    token_count = 0          # Counter to track number of tokens used
    recent_context = []      # Store recent messages that fit within the limit

    # Traverse conversation history in reverse (from newest to oldest)
    for message in reversed(conversation_history):
        tokens = len(encoding.encode(message["content"]))  # Count tokens in each message
        if token_count + tokens > token_limit:             # Check if adding this exceeds limit
            break
        recent_context.insert(0, message)  # Insert at beginning to maintain chronological order
        token_count += tokens

    return {"context": recent_context}  # Return recent messages fitting in token limit

# Endpoint to summarize the conversation history
@app.post("/summarize_history")
def summarize_history():
    # Only summarize if enough history is available
    if len(conversation_history) <= 5:
        return {"summary": "Not enough history to summarize."}

    # Basic heuristic summary (joining old messages)
    summary = "Summary of past events: " + " ".join([m["content"] for m in conversation_history[:-5]])

    # Clear the history and insert the summary
    conversation_history.clear()
    conversation_history.append({"role": "system", "content": summary})

    return {"summary": summary}

# Endpoint to reset (clear) the conversation history
@app.post("/reset")
def reset():
    conversation_history.clear()  # Simply clear the list
    return {"status": "cleared"}

# Endpoint to call an external tool (Wikipedia search)
@app.get("/tool_call")
def tool_call(query: str):
    # Prepare Wikipedia API URL
    url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query.replace(' ', '_')}"
    
    # Make a GET request to fetch the summary
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        return {"result": data.get("extract", "No summary available.")}  # Return the extract text
    return {"result": "Failed to fetch from Wikipedia."}  # Error handling if fetch fails


NameError: name 'FastAPI' is not defined