In [28]:
import picologging as logging
from pydantic import BaseModel, Field
import pandas as pd
import httpx

import pathlib
import datetime
import json
from typing import Optional, List, Union, Literal

In [3]:
now = datetime.datetime.now()
now_str = now.strftime("%Y%m%d_%H%M%S")
pathlib.Path(f"../notebooks_logging/07_collect_result").mkdir(parents=True, exist_ok=True)
pathlib.Path(f"../notebooks_logging/07_collect_result/{now_str}").mkdir(parents=True, exist_ok=True)

file_handler = logging.FileHandler(f"../notebooks_logging/07_collect_result/{now_str}.log")
stream_handler = logging.StreamHandler()
# stdout_formatter = logging.Formatter(fmt="%(message)s") 
# stream_handler.setFormatter(stdout_formatter)
logging.basicConfig(
    encoding='utf-8', 
    format='%(asctime)s | %(levelname)s | %(message)s', 
    level=logging.INFO, 
    datefmt='%Y-%m-%dT%H:%M:%S',
    handlers=[
        file_handler,
        stream_handler,
    ],
    force=True
)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
li = logger.info
lw = logger.warning
li("start")
lw("check warning")

2025-02-26T23:25:55 | INFO | start


In [11]:
files4questions = list(pathlib.Path("../data_temp/06_ask_questions").glob("*"))
li(f"files4questions=\n{"\n".join(str(x) for x in files4questions)}")

2025-02-26T23:30:04 | INFO | files4questions=
..\data_temp\06_ask_questions\q0000_63688d5d0b4f12e9f847c5407439a1ec46047a4a.json
..\data_temp\06_ask_questions\q0001_9cc771c2171bacc138cda4e7d68b8b427a514d81.json
..\data_temp\06_ask_questions\q0002_9cc771c2171bacc138cda4e7d68b8b427a514d81.json
..\data_temp\06_ask_questions\q0003_a6f23184a87f3343f17e8e8ed08f604615cdefc1.json


In [13]:
with open("../data_in/questions.json") as f:
    questions = json.loads(f.read())
questions

[{'text': "According to the annual report, what is the Operating margin (%) for Altech Chemicals Ltd  (within the last period or at the end of the last period)? If data is not available, return 'N/A'.",
  'kind': 'number'},
 {'text': "According to the annual report, what is the Operating margin (%) for Cofinimmo  (within the last period or at the end of the last period)? If data is not available, return 'N/A'.",
  'kind': 'number'},
 {'text': 'Did Cofinimmo outline any new ESG initiatives in the annual report?',
  'kind': 'boolean'},
 {'text': "What is the total number of employees let go by Hagerty, Inc. according to the annual report? If data is not available, return 'N/A'.",
  'kind': 'number'},
 {'text': "Which leadership **positions** changed at Renold plc in the reporting period? If data is not available, return 'N/A'.",
  'kind': 'names'},
 {'text': "What was the Gross margin (%) for Charles & Colvard, Ltd. according to the annual report (within the last period or at the end of 

In [16]:
class Question(BaseModel):
    text: str
    kind: Literal["number", "name", "boolean", "names"]

class SourceReference(BaseModel):
    pdf_sha1: str = Field(..., description="SHA1 hash of the PDF file")
    page_index: int = Field(..., description="Physical page number in the PDF file")

class Answer(BaseModel):
    question_text: Optional[str] = Field(None, description="Text of the question")
    kind: Optional[Literal["number", "name", "boolean", "names"]] = Field(None, description="Kind of the question")
    value: Union[float, str, bool, List[str], Literal["N/A"]] = Field(..., description="Answer to the question, according to the question schema")
    references: List[SourceReference] = Field([], description="References to the source material in the PDF file")

class AnswerSubmission(BaseModel):
    team_email: str = Field(..., description="Email that your team used to register for the challenge")
    submission_name: str = Field(..., description="Unique name of the submission (e.g. experiment name)")
    answers: List[Answer] = Field(..., description="List of answers to the questions")

In [None]:
res = {
    "team_email": "keiv.fly@gmail.com",
    "submission_name": "main",
}
answers = []
for question in questions:
    ans = {}
    q_text = question["text"]
    ans["question_text"] = q_text
    q_kind = question["kind"]
    ans["kind"] = q_kind

    match q_kind:
        case "number":
            ans_value = "N/A"           
        case "name":
            ans_value = "N/A"
        case "boolean":
            ans_value = False
        case "names":
            ans_value = "N/A"

    ans["value"] = ans_value

    ans["references"] = []
    answers.append(ans)
    
res["answers"] = answers
_ = AnswerSubmission(**res)

AnswerSubmission(team_email='keiv.fly@gmail.com', submission_name='main', answers=[Answer(question_text="According to the annual report, what is the Operating margin (%) for Altech Chemicals Ltd  (within the last period or at the end of the last period)? If data is not available, return 'N/A'.", kind='number', value='N/A', references=[]), Answer(question_text="According to the annual report, what is the Operating margin (%) for Cofinimmo  (within the last period or at the end of the last period)? If data is not available, return 'N/A'.", kind='number', value='N/A', references=[]), Answer(question_text='Did Cofinimmo outline any new ESG initiatives in the annual report?', kind='boolean', value='N/A', references=[]), Answer(question_text="What is the total number of employees let go by Hagerty, Inc. according to the annual report? If data is not available, return 'N/A'.", kind='number', value='N/A', references=[]), Answer(question_text="Which leadership **positions** changed at Renold pl

In [26]:
for file in files4questions:
    file_content_txt = file.read_text()
    # print(file_content_txt)
    q_sha1_res = json.loads(file_content_txt)
    q_id = q_sha1_res["question_id"]
    if q_sha1_res["value"] != "N/A":
        res["answers"][q_id]["value"] = q_sha1_res["value"]
        res["answers"][q_id]["references"].append({
            "pdf_sha1": q_sha1_res["ref_sha1_i"],
            "page_index": q_sha1_res["ref_sha1_i_page"],
        })

res
    

{'team_email': 'keiv.fly@gmail.com',
 'submission_name': 'main',
 'answers': [{'question_text': "According to the annual report, what is the Operating margin (%) for Altech Chemicals Ltd  (within the last period or at the end of the last period)? If data is not available, return 'N/A'.",
   'kind': 'number',
   'value': 'N/A',
   'references': []},
  {'question_text': "According to the annual report, what is the Operating margin (%) for Cofinimmo  (within the last period or at the end of the last period)? If data is not available, return 'N/A'.",
   'kind': 'number',
   'value': 81.0,
   'references': [{'pdf_sha1': '9cc771c2171bacc138cda4e7d68b8b427a514d81',
     'page_index': 18}]},
  {'question_text': 'Did Cofinimmo outline any new ESG initiatives in the annual report?',
   'kind': 'boolean',
   'value': True,
   'references': [{'pdf_sha1': '9cc771c2171bacc138cda4e7d68b8b427a514d81',
     'page_index': 20}]},
  {'question_text': "What is the total number of employees let go by Hagert

In [None]:
submission_folder_before_submission = now_str
pathlib.Path(f"../data_out/{submission_folder_before_submission}").mkdir(parents=True, exist_ok=True)
with open(f"../data_out/{submission_folder_before_submission}/submission.json", "w") as f:
    json.dump(res, f, indent=4)

In [None]:
url = "https://rag.timetoact.at/check-submission"
headers = {"accept": "application/json"}
files = {
    "file": ("submision.json", io.BytesIO(json.dumps(res).encode()), "application/json")
}
response = httpx.post(url, headers=headers, files=files)
response.json()

In [None]:
# url = "https://rag.timetoact.at/submit"
# headers = {"accept": "application/json"}
# files = {
#     "file": ("submision.json", io.BytesIO(json.dumps(res).encode()), "application/json")
# }
# response = httpx.post(url, headers=headers, files=files)
# submission_response = response.json()
# submission_response

In [None]:
s_datetime = pd.to_datetime(submission_response["response"]["time"]).strftime("%Y%m%d_%H%M%S")
s_submission_name = submission_response["response"]["submission_name"]
submission_folder = f"{s_datetime}_{s_submission_name}"
submission_folder

In [None]:
pathlib.Path(f"../data_out/{submission_folder}").mkdir(parents=True, exist_ok=True)
with open(f"../data_out/{submission_folder}/submission.json", "w") as f:
    json.dump(res, f, indent=4)

In [None]:
with open(f"../data_out/{submission_folder}/submission_response.json", "w") as f:
    json.dump(submission_response, f, indent=4)