Skip to content

Commit

Permalink
Created an annotation using instructor
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanleomk committed Apr 29, 2024
1 parent 38bd2d9 commit 227c0a4
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 0 deletions.
7 changes: 7 additions & 0 deletions examples/annotation/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Introduction

This showcases a simple streamlit module which can be used to do data annotation for files in a specific table.

Make sure to install the dependencies first with `uv pip install -r requirements.txt`

To populate the table, run `main.py`. This should generate ~20 different todos insert it into the table and mark it as unannotated. Once you've done so, you can then boot up the `annotate.py` file using the command `streamlit run annotate.py`
72 changes: 72 additions & 0 deletions examples/annotation/annotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import streamlit as st
import sqlite3


def fetch_unannotated_todos():
with sqlite3.connect("tutorial.db") as con:
cur = con.cursor()
cur.execute(
"SELECT title, description, annotated,id FROM todos WHERE annotated = FALSE"
)
todos = cur.fetchall()

return [
{"title": title, "description": description, "annotated": annotated, "id": id}
for title, description, annotated, id in todos
]


def display_todos(todos):
st.write("### Unannotated Todos")
for todo in todos:
st.write(f'({todo["id"]}) {todo["title"]}')
if st.button(f"Select {todo['id']}"):
st.session_state.curr_selected_todo = todo["id"]


st.title("Todo Annotation")

# Initialize session state
if "curr_selected_todo" not in st.session_state:
st.session_state.curr_selected_todo = None


def render_selected_todo():
if st.session_state.curr_selected_todo is not None:
with sqlite3.connect("tutorial.db") as con:
cur = con.cursor()
cur.execute(
"SELECT original_prompt,title, description FROM todos WHERE id = ?",
(st.session_state.curr_selected_todo,),
)
todo_data = cur.fetchone()
if todo_data:
st.write("Original Prompt: " + todo_data[0])
new_title = st.text_input("Title", value=todo_data[1])
new_description = st.text_area("Description", value=todo_data[2])
if st.button("Update"):
with sqlite3.connect("tutorial.db") as con:
cur = con.cursor()
cur.execute(
"UPDATE todos SET title = ?, description = ?, annotated = ? WHERE id = ?",
(
new_title,
new_description,
True,
st.session_state.curr_selected_todo,
),
)
con.commit()
st.success("Todo updated successfully!")
else:
st.write("Selected todo not found.")
else:
st.write("No todo selected.")


render_selected_todo()
unannotated_todos = fetch_unannotated_todos()
if unannotated_todos:
display_todos(unannotated_todos)
else:
st.write("No unannotated todos found.")
70 changes: 70 additions & 0 deletions examples/annotation/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import instructor
from typing import List
from openai import AsyncOpenAI
from asyncio import run
from tqdm.asyncio import tqdm_asyncio as asyncio
from pydantic import BaseModel, Field
import sqlite3


client = instructor.from_openai(AsyncOpenAI())


class TodoItem(BaseModel):
"""
This is a schema that represents an actionable item which the user needs to consider
"""

title: str = Field(..., description="This is a title for the todo item")
description: str = Field(
...,
description="This is a description that explains a plan of action for the todo",
)


async def extract_todo(user_query: str):
res = await client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You are a world class system that excels at extracting todo items from a user query",
},
{"role": "user", "content": user_query},
],
response_model=List[TodoItem],
)
return [(item, user_query) for item in res]


async def process_todos(items):
coros = [extract_todo(item) for item in items]
results = await asyncio.gather(*coros)
return [item for sublist in results for item in sublist]


if __name__ == "__main__":
con = sqlite3.connect("tutorial.db")
cur = con.cursor()
cur.execute(
"CREATE TABLE IF NOT EXISTS todos(id INTEGER PRIMARY KEY AUTOINCREMENT, annotated BOOLEAN DEFAULT FALSE, title TEXT, description TEXT, original_prompt TEXT)"
)

data = [
"This week I need to finalize the project report, schedule a meeting with the team, prepare the presentation slides, submit the budget review, and send the client update emails.",
"Next week I must organize the department outing, update the project timeline, review the new intern applications, and coordinate the quarterly webinars.",
"Tomorrow I should finalize the contract details, call the supplier for an update, draft the monthly newsletter, and check the inventory status.",
"By the end of this month, I need to complete the performance reviews, plan the training sessions, archive old project files, and renew the software licenses.",
"This Friday I have to prepare the weekly sales report, confirm the client appointments, oversee the network upgrade, and document the audit findings.",
]

todos: List[TodoItem] = run(process_todos(data))

with sqlite3.connect("tutorial.db") as con:
cur = con.cursor()
for todo, original_query in todos:
cur.execute(
"INSERT INTO todos (title, description,original_prompt) VALUES (?, ?,?)",
(todo.title, todo.description, original_query),
)
con.commit()
5 changes: 5 additions & 0 deletions examples/annotation/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
openai==1.23.6
instructor==1.2.3
pydantic==2.7.0
typer==0.12.3
streamlit==1.33.0

0 comments on commit 227c0a4

Please sign in to comment.