In [63]:
import os
import pandas as pd
from pocketbase import PocketBase
from openai import AsyncOpenAI, OpenAI, pydantic_function_tool
from dotenv import load_dotenv

In [64]:
load_dotenv('.env')

True

In [65]:
pb = PocketBase(os.getenv('PB_URL'))
pb.admins.auth_with_password(os.getenv("PB_EMAIL"), os.getenv("PB_PASSWORD"))

openai = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
async_openai = AsyncOpenAI(api_key=os.getenv('OPENAI_API_KEY'))

In [66]:
from pydantic import BaseModel

# INTRO
class Introduction(BaseModel):
    overview: str
    typical_questions: list[str]

# KEY WORDS AND PHRASES
class Word(BaseModel):
    word: str
    meaning: str
    translation: str
    example: str

class SynonymAntonym(BaseModel):
    synonym: str
    antonym: str
    synonym_translation: str
    antonym_translation: str

class Collocation(BaseModel):
    collocation: str
    meaning: str
    translation: str
    example: str

class KeyWordsAndPhrases(BaseModel):
    main_words: list[Word]
    adjectives: list[Word]
    phrasal_verbs: list[Word]
    synonyms_antonyms: list[SynonymAntonym]
    collocations: list[Collocation]

# EXPRESSIONS AND IDIOMS
class Expression(BaseModel):
    expression: str
    usage: str

class Idiom(BaseModel):
    idiom: str
    meaning: str

class ExpressionsAndIdioms(BaseModel):
    expressions: list[Expression]
    idioms: list[Idiom]

# USAGE TIPS
class SampleAnswer(BaseModel):
    question: str
    answer: str

class CommonMistake(BaseModel):
    mistake: str
    advice: str

class UsageTips(BaseModel):
    sample_answers: list[SampleAnswer]
    common_mistakes: list[CommonMistake]


# TOPIC ESSENTIAL SCHEMA
class TopicEssentialSchema(BaseModel):
    introduction: Introduction
    key_words_and_phrases: KeyWordsAndPhrases
    expressions_and_idioms: ExpressionsAndIdioms
    usage_tips: UsageTips


In [67]:
files = []
for file in os.listdir('./essentials_content'):
    files.append(os.path.join("./essentials_content", file))

In [68]:
async def process_topic(file_uri: str):
    topic_slug = file_uri.split('/')[-1].split('.')[0]

    with open(file_uri, 'r') as file:
        content = file.read()

    topic = pb.collection("topics").get_first_list_item(f"slug = '{topic_slug}'")

    res = await async_openai.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": (
                "You need to transform CONTENT into a structured format. If you can't do it with data provided in the CONTENT, you need to fill it with your own knowledge."
                "You need to create quality content for english learners. Do not hallucinate, do not lie, do not provide false information."
                )},
            {"role": "system", "content": f"CONTENT: {content}"},
            ],
            tools=[
            pydantic_function_tool(TopicEssentialSchema),
            ],
        )

    new_content = res.choices[0].message.tool_calls[0].function.arguments

    try:
        pb.collection("topics").update(topic.id, {"essentials": new_content})
        return topic_slug + " OK", new_content[:20]
    except Exception as e:
        return e

In [69]:
import asyncio


tasks = []
for file_uri in files:
    tasks.append(process_topic(file_uri))

results = await asyncio.gather(*tasks)
print(results)

[('work-business-money-shopping OK', '{"introduction":{"ov'), ('food-eating OK', '{"introduction":{"ov'), ('things-objects OK', '{"introduction":{"ov'), ('clothes-accessories OK', '{"introduction":{"ov'), ('body-mind OK', '{"introduction":{"ov'), ('media-entertainment OK', '{"introduction":{"ov'), ('technology OK', '{"introduction": {"o'), ('free-time-hobbies-skills OK', '{"introduction":{"ov'), ('schools-studies OK', '{"introduction": {"o'), ('family-friends-other-people OK', '{"introduction":{"ov'), ('travel-transport OK', '{"introduction":{"ov'), ('nature-animals-the-environment OK', '{"introduction":{"ov'), ('places OK', '{"introduction":{"ov'), ('home-hometown OK', '{"introduction":{"ov')]
