In [29]:
import sys
import json

sys.path.append("..")

import pandas as pd
import requests
from langchain.embeddings.openai import OpenAIEmbeddings

from common.aidevs_authorize import get_task, get_token, send_answer
from common.openai_requests import send_chat_completion
from common.utils import get_message, print_task
from common.databases import (
    create_collection,
    create_database_from_json,
    query_collection,
    get_points_for_upsert,
    upsert_qdrant_collection,
    read_table,
    get_conn,
    query_table_by_uuid,
)

In [66]:
task_name = "people"
token = get_token(task_name)
task = get_task(token)
print_task(task, 100)

code
0
****************************************************************************************************
msg
retrieve the data set (JSON) and answer the question. The question will change every time the task
is called. I only ask about favourite colour, favourite food and place of residence
****************************************************************************************************
data
https://zadania.aidevs.pl/data/people.json
****************************************************************************************************
question
Gdzie mieszka Krysia Ludek?
****************************************************************************************************
hint1
Does everything have to be handled by the language model?
****************************************************************************************************
hint2
prepare knowledge DB for this task
****************************************************************************************************


In [5]:
resp = requests.get(task["data"])
data = json.loads(resp.text)
create_database_from_json(data, "people")

In [6]:
collection_name = task_name.upper()

df = read_table(collection_name)
df["embedded_col"] = df["imie"] + " " + df["nazwisko"]
df

Unnamed: 0,imie,nazwisko,wiek,o_mnie,ulubiona_postac_z_kapitana_bomby,ulubiony_serial,ulubiony_film,ulubiony_kolor,uuid,collection_name,embedded_col
0,Dariusz,Kaczor,46,niekiedy lubie jeść lody. Mieszkam w Radomiu. ...,Admirał Gwiezdnej Floty,Stranger Things,Avengers,morski,dd680c1d-142a-457d-8b6f-92ab1285ea75,PEOPLE,Dariusz Kaczor
1,Katarzyna,Rumcajs,32,lubie zjadać lody. Mieszkam w Łodzi. Interesuj...,nie oglądam,Big Bang Theory,The Lord of the Rings,magenta,793bff33-bb1d-4bf7-b1d1-ff53c13b0e39,PEOPLE,Katarzyna Rumcajs
2,Renata,Pizza,49,czasami lubie zjadać pizzę. Mieszkam w Łodzi. ...,nie pamiętam tych wsystkich imion,Walking Dead,The Prestige,czerwony,14dff9d7-89e6-48fd-bb91-d0d421df212d,PEOPLE,Renata Pizza
3,Katarzyna,Kot,61,niekiedy lubie jeść pizzę. Mieszkam w Krakowie...,nie pamiętam tych wsystkich imion,Game of Thrones,Titanic,oliwkowy,29d14071-661e-4097-8f93-2b1a05543c91,PEOPLE,Katarzyna Kot
4,Władysław,Bajorko,24,niekiedy lubie jeść pizzę. Mieszkam w Poznaniu...,nie oglądam,LOST,The Sixth Sense,malinowy,7630b4b4-e5b9-4eda-b6fc-ba1e8926b9a0,PEOPLE,Władysław Bajorko
...,...,...,...,...,...,...,...,...,...,...,...
1382,Agata,Kaczka,41,niekiedy lubie spożywać pizzę. Mieszkam w Chrz...,Chorąży Torpeda,Dom z papieru,Titanic,oliwkowy,983f5327-2731-400f-82a5-76766ba857a1,PEOPLE,Agata Kaczka
1383,Jan,Kot,42,czasami lubie jeść lody. Mieszkam w Radomiu. I...,Sułtan Kosmitów,Big Bang Theory,The Lion King,złoty,7d331ba6-5ab0-4aaf-b00a-55cf8625a918,PEOPLE,Jan Kot
1384,Rafał,Banan,36,czasami lubie jeść lody. Mieszkam w Łodzi. Int...,nie oglądam,Moda na sukces,The Departed,biały,7a371de7-5ea4-4490-ba4c-a993db51828d,PEOPLE,Rafał Banan
1385,Maciej,Kaczor,62,niekiedy lubie zjadać lody. Mieszkam w Radomiu...,Admirał Gwiezdnej Floty,Dom z papieru,Titanic,koralowy,641b62f5-ab08-4795-b84b-4e2e5ce01feb,PEOPLE,Maciej Kaczor


In [23]:
points = get_points_for_upsert(df, "embedded_col")
upsert_qdrant_collection(points, collection_name)

In [67]:
embeddings = OpenAIEmbeddings()

embedded_query = embeddings.embed_query(task["question"])
resp = query_collection(embedded_query, collection_name)
resp

[ScoredPoint(id='88d0cedd-b91c-4637-9acc-104dad7a1512', version=11, score=0.8911058, payload={'content': 'Krystyna Ludek', 'source': 'PEOPLE', 'uuid': '88d0cedd-b91c-4637-9acc-104dad7a1512'}, vector=None)]

In [68]:
uuid_found = resp[0].payload["uuid"]

answer_record = query_table_by_uuid(collection_name.lower(), uuid_found).loc[0]
answer_record

imie                                                                         Krystyna
nazwisko                                                                        Ludek
wiek                                                                               46
o_mnie                              niekiedy lubie jeść lody. Mieszkam w Warszawie...
ulubiona_postac_z_kapitana_bomby                                          nie oglądam
ulubiony_serial                                                                  LOST
ulubiony_film                                                         The Sixth Sense
ulubiony_kolor                                                              granatowy
uuid                                             88d0cedd-b91c-4637-9acc-104dad7a1512
collection_name                                                                PEOPLE
Name: 0, dtype: object

In [69]:
system_content = "\n".join(
    [
        "Be very specific and ultra-concise, return as little information as possible. When asked a question, return an equivalent sentce instead of full sentence",
        f"Informacje o: {answer_record.imie} {answer_record.nazwisko}",
        f"Opis: {answer_record.o_mnie}",
        f"Ulubiony kolor: {answer_record.ulubiony_kolor}",
    ]
)
ai_resp = send_chat_completion(
    model_version="gpt-4", system_content=system_content, user_content=task["question"]
)
ai_resp = get_message(ai_resp)

In [70]:
ai_resp

'Warszawa.'

In [71]:
send_answer(token, ai_resp)

'{\n    "code": 0,\n    "msg": "OK",\n    "note": "CORRECT"\n}'