In [16]:
import json, requests, os
import io
GPT_KEY = os.environ.get('OPENAI_API_KEY_MIT')
assert GPT_KEY, "Please set the OPENAI_API_KEY_MIT environment variable"
API_ROOT = "http://localhost:8000/"

from pathlib import Path
import pandas as pd

In [10]:

def skema_integrated_pdf_extractions(pdf_path):
    """ wrapper that calls /integrated-pdf-extractions endpoint of the skema api"""

    if isinstance(pdf_path, str):
        pdf_path = Path(pdf_path)

    pdf_path : Path = pdf_path.expanduser()
    assert pdf_path.exists()

    output_path = pdf_path.with_suffix('.skema_integrated.json')
    if output_path.exists():
        with open(output_path) as f:
            return json.load(f)

    URL= "https://api.askem.lum.ai"

    params = {
       "annotate_skema":True,
       "annotate_mit": True,
    }

    files = [("pdfs", (str(pdf_path), open(str(pdf_path), "rb")))]
    response = requests.post(f"{URL}/text-reading/integrated-pdf-extractions", params=params, files=files)
    if response.status_code == 200:
        data = response.json()
        with open(output_path, 'w') as f:
            json.dump(data, f, indent=2)
        return data
    else:
        raise  Exception(f'{response.status_code=} {response.text=}')

In [2]:
import importlib
import mitaskem.api.routers.annotation
importlib.reload(mitaskem.api.routers.annotation)

* 'schema_extra' has been renamed to 'json_schema_extra'


<module 'mitaskem.api.routers.annotation' from '/Users/orm/repos/mitaskem/mitaskem/api/routers/annotation.py'>

In [3]:
# from pathlib import Path
# example_data = Path("~/example_data/").expanduser()
# for file in example_data.iterdir():
#     if file.suffix == ".pdf":
#         print(file)
#         try:
#             data = skema_integrated_pdf_extractions(file)
#             df = list_scenarios_local(None, extractions=data, return_early=True)
#             print(df)
#         except Exception as e:
#             print(e)


In [22]:
def scenario_extraction(integrated_json, url="http://54.227.237.7"):
    ## send post request to
    # URL = ""
    params = {
        "gpt_key": GPT_KEY,
    }

    files = [("extractions_file", ('', io.BytesIO(json.dumps(integrated_json).encode('utf-8'))))]

    response = requests.post(f'{url}/annotation/list_scenarios/', params=params, files=files)
    if response.status_code == 200:
        return response.json()
    else:
        print(response.text)
        raise  Exception(f'{response.status_code=} {response.text=}')

In [7]:
paths = [
    '/Users/orm/example_data/Patty-reviewed-1-28-24-Why is it difficult to accurately predict the COVID-19 epidemic (1).pdf',
    '/Users/orm/example_data/Patty-reviewed-1-28-24-bertozzi-et-al-2020-the-challenges-of-modeling-and-forecasting-the-spread-of-covid-19.pdf',
    '/Users/orm/example_data/Patty-reviewed-1-28-24-Mathematical modeling of COVID-19 transmission dynamics with a case study of Wuhan.pdf',
]


In [23]:
for path in paths:
    data = skema_integrated_pdf_extractions(path)
    extr  = scenario_extraction(data, url="http://localhost:8000")
    print(pd.DataFrame(extr))

                         varname value    geo
0  basic reproduction number R 0     2  Wuhan
1                              K   < N   None
2                              N    40   None
3                              K   > N   None
                  varname  value            geo
0                 R ( t )   1.00          China
1                 R ( t )   1.00          Italy
2     reproduction number   2.50  United States
3   B ( CID :88 ) R ( T )   3.00             IK
4                       K   1.00           None
5                      R0   2.70     California
6                   gamma   0.12     California
7                      I0   0.10     California
8                      R0   4.10       New York
9                   gamma   0.10       New York
10                     I0   0.05       New York
      varname                                      value   geo
0   L C ( 0 )  number of confirmed cases 04 January 2020  None
1  L C ( 65 )    number of confirmed cases 09 March 2020  None
2   L