In [100]:
import sys 
import sqlite3
import argparse
import os 
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
load_dotenv()

True

In [101]:
# We need to create a connection to the database file, named vocab.db
# We have retrieved this file from our kindle, and it contains the words we have looked up
def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except sqlite3.Error as e:
        print(e)
    return conn

In [102]:
# We want to joint the two tables from LOOKUPS, BOOK_INFO and WORDS
# We can do this by making use of the book_key from LOOKUPS and id from BOOK_INFO
# To join the other tables we have to make use of the word_key from LOOKUPS and id from WORDS
# The only columns we need are: word_key/id, word, stem, title, and usage
def join_tables(conn):
    cur = conn.cursor()
    cur.execute("SELECT word_key, word, title, usage FROM LOOKUPS JOIN WORDS ON LOOKUPS.word_key = WORDS.id JOIN BOOK_INFO ON LOOKUPS.book_key = BOOK_INFO.id")
    rows = cur.fetchall()
    return rows

In [103]:
# Let's try our code
def retrieve_rows(file):
    conn = create_connection(file)
    with conn:
        rows = join_tables(conn)
        return rows

In [104]:
file = 'vocab.db'
rows = retrieve_rows(file)

In [105]:
# Create an OpenAI client to use the API
client = OpenAI()

In [106]:
# For each word in our joined tables, we want to determine the meaning of the word in the context of the usage. 
# We will use the OpenAI client to generate a response for each word
def generate_definition(word, usage):
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            #{"role": "system", "content": "You are a language bot who ONLY returns the English definition of the word given the context. The translation must at most be several words."},
            {"role": "system", "content": "You are a language bot who EXCLUSIVELY returns the English definition of the word given the context, without explanation. The translation must at most be several words."},
            {
                "role": "user",
                "content": f"Define \"{word}\" in the context of \"{usage}\"."
            }
        ]
    )
    return completion.choices[0].message.content


In [107]:
# Take first 10 rows for testing
rows = retrieve_rows(file)
rows = rows[:50]
definitions = []
results = []
#a = pandas.dataframe(columns=['word, 'definition', 'context'])
for row in rows:
    word = row[1]
    usage = row[3]
    definition = generate_definition(word, usage)
    definitions.append(definition)
    #a.append_row([row[1], response, row[3]])
    results.append([row[1], definition, row[3]])
df = pd.DataFrame(results, columns=['Word', 'Definition', 'Context'])



In [108]:
df

Unnamed: 0,Word,Definition,Context
0,我,"""I""",我叫陈小希 我人生中最重要的事情就是画画 画画… 和… 就画画吧 但是我16岁那年 最重要的...
1,糊,to cheat or deceive,你就玩 …很用心写的 你竟然拿歌词糊弄我 我自己写的都比你教我写的好 谁写得好 你去送一下不...
2,岁月,Time or years.,“自从有了你 世界变得好美丽 一起漂泊 一起流浪 岁月里全都是…” 还真的是歌词啊 喂 小希...
3,晨曦,Morning light,我叫陈小希 我人生中最重要的事情就是画画 画画… 和… 就画画吧 但是我16岁那年 最重要的...
4,欢迎,Welcome,我叫陈小希 我人生中最重要的事情就是画画 画画… 和… 就画画吧 但是我16岁那年 最重要的...
5,剩下,"""remaining""",刘老师 我跟你说 就你们班这帮兔崽子啊 闹事不是一天两天了 这次不严惩 下次就进少管所 -我...
6,检讨,self-reflection or self-criticism,不是 好吧 我教你写检讨吧 不用 你这句话不能这么写 哪有写检讨口气这么大的？
7,闹了,"""Stop it.""",陈小希 我们走吧 “一个默默爱着你的人 我虽然不能喝酒 但是我却想喝醉 醉在你怀里” -别闹...
8,繁华,prosperous and bustling,没有 也许是仙女施了个魔法 变出你和我对话 这繁华世界让人应接不暇 哪里会让我停下？
9,精神,Mental state,她谈恋爱了 高考没考上 整个精神失常了 故事的走向怎么这么可怕呀？
