From 5fd70a596180b5291638faee2046278b0588d686 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 14 Oct 2025 17:30:59 -0700 Subject: [PATCH 1/9] Docs: checkpoint --- docs/README.md | 17 ++++---- docs/getting-started.md | 88 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 9 deletions(-) create mode 100644 docs/getting-started.md diff --git a/docs/README.md b/docs/README.md index 3bafeb1..4423799 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,14 +1,13 @@ # Typeagent docs -## TBD +## Basics -For now we have: +- [Getting Started](getting-started.md) +- [High-level API](high-level-api.md) +- [Environment Variables](env-vars.md) -### High-level API: +## Advanced -- create_conversation -- [conversation.query](query-method.md) - -### Other - -- [architecture design](typeagent-architecture.md) +- [Reproducing the Demos](demos.md) +- [Downloading GMail Messages](gmail.md) +- [Developing and Contributing](developing.md) diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000..db10761 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,88 @@ +# Getting Started + +## Installation + +```sh +$ pip install typeagent +``` + +You might also want to use a +[virtual environment](https://docs.python.org/3/library/venv.html) +or another tool like [poetry](https://python-poetry.org/) +or [uv](https://docs.astral.sh/uv/), as long as your tool can +install wheels from [PyPI](https://pypi.org). + +## "Hello world" ingestion program + +### 1. Create a file named `transcript.txt` containing messages to index, e.g.: + +```txt +STEVE We should really make a Python library for Structured RAG. +UMESH Who would be a good person to do the Python library? +GUIDO I volunteer to do the Python library. Give me a few months. +``` + +### 2. Write a small program like this: + +```py +from typeagent import create_conversation +from typeagent.transcripts.transcript import ( + TranscriptMessage, + TranscriptMessageMeta, +) + + +def read_messages(filename) -> list[TranscriptMessage]: + messages: list[TranscriptMessage] = [] + with open(filename, "r") as f: + for line in f: + # Parse each line into a TranscriptMessage + speaker, text_chunk = line.split(None, 1) + message = TranscriptMessage( + text_chunks=[text_chunk], + metadata=TranscriptMessageMeta(speaker=speaker), + ) + messages.append(message) + return messages + + +async def main(): + conversation = await create_conversation("demo.db", TranscriptMessage) + messages = read_messages("transcript.txt") + print(f"Indexing {len(messages)} messages...") + results = await conversation.add_messages_with_indexing(messages) + print(f"Indexed {results.messages_added} messages.") + print(f"Got {results.semrefs_added} semantic refs.") + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) +``` + +### 3. Set up your environment for using OpenAI + +The minimal set of environment variables seems to be: + +```sh +export OPENAI_API_KEY=your-very-secret-openai-api-key +export OPENAI_MODEL=gpt-4o +``` + +(See [Environment Variables](env-vars.md) for more information.) + +### 4. Run your program + +Expected output is something like: + +```txt +0.027s -- Using OpenAI +Indexing 3 messages... +Indexed 3 messages. +Got 26 semantic refs. +``` + +## "Hello world" query program + +TBD. From 3ce7129d1b6d8e799c192ba3bca893e7d0ce06d3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 14 Oct 2025 20:49:43 -0700 Subject: [PATCH 2/9] Complete getting-started.md --- docs/env-vars.md | 0 docs/getting-started.md | 62 ++++++++++++++++++++++++++++++++++++----- docs/high-level-api.md | 0 3 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 docs/env-vars.md create mode 100644 docs/high-level-api.md diff --git a/docs/env-vars.md b/docs/env-vars.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/getting-started.md b/docs/getting-started.md index db10761..a7119ac 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -14,7 +14,7 @@ install wheels from [PyPI](https://pypi.org). ## "Hello world" ingestion program -### 1. Create a file named `transcript.txt` containing messages to index, e.g.: +### 1. Create a text file named `transcript.txt` ```txt STEVE We should really make a Python library for Structured RAG. @@ -22,7 +22,7 @@ UMESH Who would be a good person to do the Python library? GUIDO I volunteer to do the Python library. Give me a few months. ``` -### 2. Write a small program like this: +### 2. Create a Python file named `demo.py` ```py from typeagent import create_conversation @@ -57,24 +57,30 @@ async def main(): if __name__ == "__main__": import asyncio - asyncio.run(main()) ``` ### 3. Set up your environment for using OpenAI -The minimal set of environment variables seems to be: +The minimal set of environment variables is: ```sh export OPENAI_API_KEY=your-very-secret-openai-api-key export OPENAI_MODEL=gpt-4o ``` -(See [Environment Variables](env-vars.md) for more information.) +Some OpenAI setups will require som additional environment variables. +See [Environment Variables](env-vars.md) for more information. +You will also find information there on how to use +Azure-hosted OpenAI models. ### 4. Run your program -Expected output is something like: +```sh +$ python demo.py +``` + +Expected output looks like: ```txt 0.027s -- Using OpenAI @@ -85,4 +91,46 @@ Got 26 semantic refs. ## "Hello world" query program -TBD. +### 1. Write this small program + +```py +from typeagent import create_conversation +from typeagent.transcripts.transcript import TranscriptMessage + + +async def main(): + conversation = await create_conversation("demo.db", TranscriptMessage) + question = "Who volunteered to do the python library?" + print("Q:", question) + answer = await conversation.query(question) + print("A:", answer) + + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) +``` + +### 2. Set up your environment like above + +### 3. Run your program + +```sh +$ python query.py +``` + +Expected output looks like: + +```txt +0.019s -- Using OpenAI +Q: Who volunteered to do the python library? +A: Guido volunteered to do the Python library. +``` + +## Next steps + +You can study the full documentation for `create_conversation()` +and `conersation.query()` in [High-level API](high-level-api.md). + +You can also study the source code at the +[typeagent-py repo](https://github.com/microsoft/typeagent-py). \ No newline at end of file diff --git a/docs/high-level-api.md b/docs/high-level-api.md new file mode 100644 index 0000000..e69de29 From 5090a71e4ea7737aeb96f7ba86db8c252ee239d6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 14 Oct 2025 21:34:00 -0700 Subject: [PATCH 3/9] Complete high-level-api.md --- docs/high-level-api.md | 111 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/docs/high-level-api.md b/docs/high-level-api.md index e69de29..3cbc34c 100644 --- a/docs/high-level-api.md +++ b/docs/high-level-api.md @@ -0,0 +1,111 @@ +# High-level API + +NOTE: When an argument's default is given as `[]`, this is a shorthand +for a dynamically assigned default value on each call. We don't mean +the literal meaning of this notation in Python, which would imply +that all calls would share a single empty list object as their default. + +## Classes + +### Message classes + +#### `ConversationMessage` + +`typeagent.knowpro.universal_message.ConversationMessage` + +Constructor and fields: + +```py +class ConversationMessage( + text_chunks: list[str], # Text of the message, 1 or more chunks + tags: list[str] = [], # Optional tags + timestamp: str | None = None, # ISO timestamp in UTC with 'z' suffix + metadata: ConversationMessageMeta, # See below +) +``` + +- Only `text_chunks` is required. +- Tags are arbitrary pieces of information attached to a message + that will be indexed; e.g. `["sketch", "pet shop"] +- If present, the timestamp must be of the form `2025-10-14T09:03:21z`. + +#### `ConversationMessageMeta` + +`typeagent.knowpro.universal_message.ConversationMessageMeta` + +Constructor and fields: + +```py +class ConversationMessageMeta( + speaker: str | None = None, # Optional entity who sent the message + recipients: list[str] = [], # Optional entities to whom the message was sent +) +``` + +This class represents the metadata for a given `ConversationMessage`. + +#### `TranscriptMessage` and `TranscriptMessageMeta` + +`typeagent.transcripts.transcript.TranscriptMessage` +`typeagent.transcripts.transcript.TranscriptMessageMeta` + +These are simple aliases for `ConversationMessage` and +`ConversationMessageMeta`, respectively. + +### Conversation classes + +#### `ConversationBase` + +`typeagent.knowpro.factory.ConversationBase` + +Represents a conversation, which holds ingested messages and the +extracted and indexed knowledge thereof. + +It is constructed by calling the factory function +`typeagent.create_conversation` described below. + +It has one public method: + +- `query` + ```py + async def query( + question: str, + # Other parameters are not pubic + ) -> str + ``` + + Tries to answer the question using (only) the indexed messages. + If no answer is found, the returned string starts with + `"No answer found:"` or `"Unexpected answer type:"`. + +## Functions + +There is currently only one public function. + +#### Factory function + +- `create_conversation` + ```py + async def create_conversation( + dbname: str | None, + message_type: type, + name: str = "", + tags: list[str] | None = None, + settings: ConversationSettings | None = None, + ) -> ConversationBase + ``` + + - Constructs a conversation object. + - The required `dbname` argument specifies the SQLite3 database + name (e.g. `test.db`). If explicitly set to `None` the data is + stored in RAM and will not persist when the process exits. + - The required `message_type` is normally `TranscriptMessage` + or `ConversationMessage` (there are other possibilities too, + as yet left undocumented). + - The optional `name` specifies the conversation name, which + may be used in diagnostics. + - `tags` gives tags (like `ConversationMessage.tags`) for the whole + conversation. + - `settings` provides overrides for various aspects of the knowledge + extraction and indexing process. Its exact usage is currently left + as an exercise for the reader. From 8ecbf942f8f21a9281fe11907e50f78720c90dfa Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 14 Oct 2025 21:57:33 -0700 Subject: [PATCH 4/9] Complete env-vars.md --- docs/env-vars.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/env-vars.md b/docs/env-vars.md index e69de29..88dd696 100644 --- a/docs/env-vars.md +++ b/docs/env-vars.md @@ -0,0 +1,44 @@ +# Environment Variables + +No LLM-using application today works without API tokens and/or other +authentication secrets. These are almost always passed via environment +variables. + +Typeagent currently supports two families of environment variables: + +- Those for (public) OpenAI servers. +- Those for the Azure OpenAI service. + +## OPENAI environment variables + +The (public) OpenAI environment variables include: + +- `OPENAI_API_KEY`: Your secret API key that you get from the + [OpenAI dashboard](https://platform.openai.com/api-keys). +- `OPENAI_MODEL`: An environment variable introduced by + [TypeChat](https://microsoft.github.io/TypeChat/docs/examples/) + indicating the model to use (e.g.`gpt-4o`). + +## Azure OpenAI environment variables + +If you are using the OpenAI service hosted by Azure, you need different +environment variables, starting with: + +- `AZURE_OPENAI_API_KEY`: Your Azure OpenAI API key. +- `AZURE_OPENAI_ENDPOINT`: The full URL of the Azure OpenAI REST API + (e.g. https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2023-05-15). +- If you use Azure OpenAI you will know where to get these + (or ask your sysadmin). + +## Conflicts + +If you set both `OPENAI_API_KEY` and `AZURE_OPENAI_API_KEY`, +plain `OPENAI` will win. + +## Other ways to specify environment variables + +It is recommended to put your environment variables in a file named +`.env` in the current or parent directory. +To pick up these variables, call `typeagent.aitools.utils.load_dotenv()` +at the start of your program (before calling any typeagent functions). +(For simplicity this is not shown in [Getting Started](getting-started.md).) From 94b94edcb30d784724b223b3f17a8446e0f1171d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Oct 2025 09:22:34 -0700 Subject: [PATCH 5/9] Short versions of demos.md, gmail.md, developing.md. --- docs/demos.md | 3 +++ docs/developing.md | 12 ++++++++++++ docs/gmail.md | 8 ++++++++ 3 files changed, 23 insertions(+) create mode 100644 docs/demos.md create mode 100644 docs/developing.md create mode 100644 docs/gmail.md diff --git a/docs/demos.md b/docs/demos.md new file mode 100644 index 0000000..e6d11cc --- /dev/null +++ b/docs/demos.md @@ -0,0 +1,3 @@ +# How to reproduce the demos + +This will be revealed after [PyBay 2025](https://pybay.org/). diff --git a/docs/developing.md b/docs/developing.md new file mode 100644 index 0000000..9d61ab5 --- /dev/null +++ b/docs/developing.md @@ -0,0 +1,12 @@ +# Developing and Contributing + +**Always follow the code of conduct, see [Code of Conduct](../CODE_OF_CONDUCT.md).** + +To contribute, submit issues or PRs to +[our repo](https://github.com/microsoft/typeagent-py). + +To develop, for now you're on your own. +We use [uv](https://docs.astral.sh/uv/) for some things things. +Check out the [Makefile](../Makefile) for some recipes. + +More TBD. diff --git a/docs/gmail.md b/docs/gmail.md new file mode 100644 index 0000000..f016ffb --- /dev/null +++ b/docs/gmail.md @@ -0,0 +1,8 @@ +# Extracting GMail messages + +There's a helper script in the repo under `gmail/`. +It requires setting up and creating a Google API project. +Until we have time to write this up, your best bet is to +ask your favorite search engine or LLM-based chat bot for help. + +More TBD. From d948755604fc6cb704add205e9451d69a26617e3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Oct 2025 09:24:15 -0700 Subject: [PATCH 6/9] Remove outdated query-method.md --- docs/query-method.md | 99 -------------------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 docs/query-method.md diff --git a/docs/query-method.md b/docs/query-method.md deleted file mode 100644 index ea07f42..0000000 --- a/docs/query-method.md +++ /dev/null @@ -1,99 +0,0 @@ -# Conversation Query Method - -The `query()` method provides a simple, end-to-end API for querying conversations using natural language. - -## Usage - -```python -from typeagent import create_conversation -from typeagent.transcripts.transcript import TranscriptMessage - -# Create a conversation -conv = await create_conversation( - "my_conversation.db", - TranscriptMessage, - name="My Conversation", -) - -# Add messages -messages: list[TranscriptMessage] = [...] -await conv.add_messages_with_indexing(messages) - -# Query the conversation -question: str = input("typeagent> ") -answer: str = await conv.query(question) -print(answer) -``` - -## How It Works - -The `query()` method encapsulates the full TypeAgent query pipeline: - -1. **Natural Language Understanding**: Uses TypeChat to translate the natural language question into a structured search query -2. **Search**: Executes the search across the conversation's messages and knowledge base -3. **Answer Generation**: Uses an LLM to generate a natural language answer based on the search results - -## Method Signature - -```python -async def query(self, question: str) -> str: - """ - Run an end-to-end query on the conversation. - - Args: - question: The natural language question to answer - - Returns: - A natural language answer string. If the answer cannot be determined, - returns an explanation of why no answer was found. - """ -``` - -## Behavior - -- **Success**: Returns a natural language answer synthesized from the conversation content -- **No Answer Found**: Returns a message explaining why the answer couldn't be determined -- **Search Failure**: Returns an error message describing the failure - -## Performance Considerations - -The `query()` method caches the TypeChat translators per conversation instance, so repeated queries on the same conversation are more efficient. - -## Example: Interactive Loop - -```python -while True: - question: str = input("typeagent> ") - if not question.strip(): - continue - if question.lower() in ("quit", "exit"): - break - - answer: str = await conv.query(question) - print(answer) -``` - -## Example: Batch Processing - -```python -questions = [ - "What was discussed?", - "Who were the speakers?", - "What topics came up?", -] - -for question in questions: - answer = await conv.query(question) - print(f"Q: {question}") - print(f"A: {answer}") - print() -``` - -## Related APIs - -For more control over the query pipeline, you can use the lower-level APIs: - -- `searchlang.search_conversation_with_language()` - Search only -- `answers.generate_answers()` - Answer generation from search results - -See `tools/query.py` for examples of using these lower-level APIs with debugging options. From 1b88c6790068de1a5747a5a93f66781277e764be Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Oct 2025 09:26:38 -0700 Subject: [PATCH 7/9] Consistent title capitalization --- docs/README.md | 2 +- docs/demos.md | 2 +- docs/gmail.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/README.md b/docs/README.md index 4423799..0bf687a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,4 +1,4 @@ -# Typeagent docs +# Typeagent Docs ## Basics diff --git a/docs/demos.md b/docs/demos.md index e6d11cc..b94abf0 100644 --- a/docs/demos.md +++ b/docs/demos.md @@ -1,3 +1,3 @@ -# How to reproduce the demos +# How to Reproduce the Demos This will be revealed after [PyBay 2025](https://pybay.org/). diff --git a/docs/gmail.md b/docs/gmail.md index f016ffb..4c0b514 100644 --- a/docs/gmail.md +++ b/docs/gmail.md @@ -1,4 +1,4 @@ -# Extracting GMail messages +# Extracting GMail Messages There's a helper script in the repo under `gmail/`. It requires setting up and creating a Google API project. From c463b1e8e88db36978dfc34fab8694527a1cc13e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Oct 2025 09:32:35 -0700 Subject: [PATCH 8/9] Teach test_email.py about 'No answer found:' prefix --- docs/high-level-api.md | 2 +- tools/test_email.py | 3 ++- typeagent/knowpro/conversation_base.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/high-level-api.md b/docs/high-level-api.md index 3cbc34c..82ee3e1 100644 --- a/docs/high-level-api.md +++ b/docs/high-level-api.md @@ -76,7 +76,7 @@ It has one public method: Tries to answer the question using (only) the indexed messages. If no answer is found, the returned string starts with - `"No answer found:"` or `"Unexpected answer type:"`. + `"No answer found:"`. ## Functions diff --git a/tools/test_email.py b/tools/test_email.py index ac3a6b8..193178b 100644 --- a/tools/test_email.py +++ b/tools/test_email.py @@ -284,7 +284,8 @@ async def generate_answer(context: EmailContext, args: list[str]): print(Fore.CYAN + f"Getting answer for:\n{question} " + Fore.RESET) answer = await context.conversation.query(question) - print(Fore.GREEN + answer + Fore.RESET) + color = Fore.RED if answer.startswith("No answer found:") else Fore.GREEN + print(color + answer + Fore.RESET) async def reset_index(context: EmailContext, args: list[str]): diff --git a/typeagent/knowpro/conversation_base.py b/typeagent/knowpro/conversation_base.py index b09a500..06de8da 100644 --- a/typeagent/knowpro/conversation_base.py +++ b/typeagent/knowpro/conversation_base.py @@ -384,5 +384,5 @@ async def query( return f"No answer found: {combined_answer.whyNoAnswer or 'Unable to find relevant information'}" case "Answered": return combined_answer.answer or "No answer provided" - case _: + case _: # Cannot happen in type-checked code return f"Unexpected answer type: {combined_answer.type}" From bfe4a2a2647f88705eb7d71fa75c03c62f4990a7 Mon Sep 17 00:00:00 2001 From: gvanrossum-ms Date: Wed, 15 Oct 2025 10:03:07 -0700 Subject: [PATCH 9/9] Fix typos found by Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/developing.md | 2 +- docs/getting-started.md | 4 ++-- docs/high-level-api.md | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/developing.md b/docs/developing.md index 9d61ab5..849144f 100644 --- a/docs/developing.md +++ b/docs/developing.md @@ -6,7 +6,7 @@ To contribute, submit issues or PRs to [our repo](https://github.com/microsoft/typeagent-py). To develop, for now you're on your own. -We use [uv](https://docs.astral.sh/uv/) for some things things. +We use [uv](https://docs.astral.sh/uv/) for some things. Check out the [Makefile](../Makefile) for some recipes. More TBD. diff --git a/docs/getting-started.md b/docs/getting-started.md index a7119ac..451f911 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -69,7 +69,7 @@ export OPENAI_API_KEY=your-very-secret-openai-api-key export OPENAI_MODEL=gpt-4o ``` -Some OpenAI setups will require som additional environment variables. +Some OpenAI setups will require some additional environment variables. See [Environment Variables](env-vars.md) for more information. You will also find information there on how to use Azure-hosted OpenAI models. @@ -130,7 +130,7 @@ A: Guido volunteered to do the Python library. ## Next steps You can study the full documentation for `create_conversation()` -and `conersation.query()` in [High-level API](high-level-api.md). +and `conversation.query()` in [High-level API](high-level-api.md). You can also study the source code at the [typeagent-py repo](https://github.com/microsoft/typeagent-py). \ No newline at end of file diff --git a/docs/high-level-api.md b/docs/high-level-api.md index 82ee3e1..4978f61 100644 --- a/docs/high-level-api.md +++ b/docs/high-level-api.md @@ -26,7 +26,7 @@ class ConversationMessage( - Only `text_chunks` is required. - Tags are arbitrary pieces of information attached to a message - that will be indexed; e.g. `["sketch", "pet shop"] + that will be indexed; e.g. `["sketch", "pet shop"]` - If present, the timestamp must be of the form `2025-10-14T09:03:21z`. #### `ConversationMessageMeta` @@ -70,7 +70,7 @@ It has one public method: ```py async def query( question: str, - # Other parameters are not pubic + # Other parameters are not public ) -> str ```