-
-
Notifications
You must be signed in to change notification settings - Fork 484
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Created an annotation using instructor
- Loading branch information
Showing
4 changed files
with
154 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Introduction | ||
|
||
This showcases a simple streamlit module which can be used to do data annotation for files in a specific table. | ||
|
||
Make sure to install the dependencies first with `uv pip install -r requirements.txt` | ||
|
||
To populate the table, run `main.py`. This should generate ~20 different todos insert it into the table and mark it as unannotated. Once you've done so, you can then boot up the `annotate.py` file using the command `streamlit run annotate.py` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import streamlit as st | ||
import sqlite3 | ||
|
||
|
||
def fetch_unannotated_todos(): | ||
with sqlite3.connect("tutorial.db") as con: | ||
cur = con.cursor() | ||
cur.execute( | ||
"SELECT title, description, annotated,id FROM todos WHERE annotated = FALSE" | ||
) | ||
todos = cur.fetchall() | ||
|
||
return [ | ||
{"title": title, "description": description, "annotated": annotated, "id": id} | ||
for title, description, annotated, id in todos | ||
] | ||
|
||
|
||
def display_todos(todos): | ||
st.write("### Unannotated Todos") | ||
for todo in todos: | ||
st.write(f'({todo["id"]}) {todo["title"]}') | ||
if st.button(f"Select {todo['id']}"): | ||
st.session_state.curr_selected_todo = todo["id"] | ||
|
||
|
||
st.title("Todo Annotation") | ||
|
||
# Initialize session state | ||
if "curr_selected_todo" not in st.session_state: | ||
st.session_state.curr_selected_todo = None | ||
|
||
|
||
def render_selected_todo(): | ||
if st.session_state.curr_selected_todo is not None: | ||
with sqlite3.connect("tutorial.db") as con: | ||
cur = con.cursor() | ||
cur.execute( | ||
"SELECT original_prompt,title, description FROM todos WHERE id = ?", | ||
(st.session_state.curr_selected_todo,), | ||
) | ||
todo_data = cur.fetchone() | ||
if todo_data: | ||
st.write("Original Prompt: " + todo_data[0]) | ||
new_title = st.text_input("Title", value=todo_data[1]) | ||
new_description = st.text_area("Description", value=todo_data[2]) | ||
if st.button("Update"): | ||
with sqlite3.connect("tutorial.db") as con: | ||
cur = con.cursor() | ||
cur.execute( | ||
"UPDATE todos SET title = ?, description = ?, annotated = ? WHERE id = ?", | ||
( | ||
new_title, | ||
new_description, | ||
True, | ||
st.session_state.curr_selected_todo, | ||
), | ||
) | ||
con.commit() | ||
st.success("Todo updated successfully!") | ||
else: | ||
st.write("Selected todo not found.") | ||
else: | ||
st.write("No todo selected.") | ||
|
||
|
||
render_selected_todo() | ||
unannotated_todos = fetch_unannotated_todos() | ||
if unannotated_todos: | ||
display_todos(unannotated_todos) | ||
else: | ||
st.write("No unannotated todos found.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import instructor | ||
from typing import List | ||
from openai import AsyncOpenAI | ||
from asyncio import run | ||
from tqdm.asyncio import tqdm_asyncio as asyncio | ||
from pydantic import BaseModel, Field | ||
import sqlite3 | ||
|
||
|
||
client = instructor.from_openai(AsyncOpenAI()) | ||
|
||
|
||
class TodoItem(BaseModel): | ||
""" | ||
This is a schema that represents an actionable item which the user needs to consider | ||
""" | ||
|
||
title: str = Field(..., description="This is a title for the todo item") | ||
description: str = Field( | ||
..., | ||
description="This is a description that explains a plan of action for the todo", | ||
) | ||
|
||
|
||
async def extract_todo(user_query: str): | ||
res = await client.chat.completions.create( | ||
model="gpt-3.5-turbo", | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": "You are a world class system that excels at extracting todo items from a user query", | ||
}, | ||
{"role": "user", "content": user_query}, | ||
], | ||
response_model=List[TodoItem], | ||
) | ||
return [(item, user_query) for item in res] | ||
|
||
|
||
async def process_todos(items): | ||
coros = [extract_todo(item) for item in items] | ||
results = await asyncio.gather(*coros) | ||
return [item for sublist in results for item in sublist] | ||
|
||
|
||
if __name__ == "__main__": | ||
con = sqlite3.connect("tutorial.db") | ||
cur = con.cursor() | ||
cur.execute( | ||
"CREATE TABLE IF NOT EXISTS todos(id INTEGER PRIMARY KEY AUTOINCREMENT, annotated BOOLEAN DEFAULT FALSE, title TEXT, description TEXT, original_prompt TEXT)" | ||
) | ||
|
||
data = [ | ||
"This week I need to finalize the project report, schedule a meeting with the team, prepare the presentation slides, submit the budget review, and send the client update emails.", | ||
"Next week I must organize the department outing, update the project timeline, review the new intern applications, and coordinate the quarterly webinars.", | ||
"Tomorrow I should finalize the contract details, call the supplier for an update, draft the monthly newsletter, and check the inventory status.", | ||
"By the end of this month, I need to complete the performance reviews, plan the training sessions, archive old project files, and renew the software licenses.", | ||
"This Friday I have to prepare the weekly sales report, confirm the client appointments, oversee the network upgrade, and document the audit findings.", | ||
] | ||
|
||
todos: List[TodoItem] = run(process_todos(data)) | ||
|
||
with sqlite3.connect("tutorial.db") as con: | ||
cur = con.cursor() | ||
for todo, original_query in todos: | ||
cur.execute( | ||
"INSERT INTO todos (title, description,original_prompt) VALUES (?, ?,?)", | ||
(todo.title, todo.description, original_query), | ||
) | ||
con.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
openai==1.23.6 | ||
instructor==1.2.3 | ||
pydantic==2.7.0 | ||
typer==0.12.3 | ||
streamlit==1.33.0 |