# Extract graph

In [25]:
from dotenv import load_dotenv
import json
from time import sleep
load_dotenv("../.env")

True

In [5]:
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain.chains import LLMChain
from langchain.llms import BaseLLM
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate


In [4]:
chat = ChatOpenAI(model="gpt-3.5-turbo-16k")

Build a chain for knowledge extraction

In [14]:
class KGChain(LLMChain):
    @classmethod
    def from_llm(cls, llm: BaseLLM):

        prompt = PromptTemplate(
            template="""
    Please extract knowledge from the following piece of text from a paper.:
    {paper_text}, current knowledge graph structure is:{knowledge_graph},
    please enhance the knowledge graph with the extracted knowledge if necessary.
    The node types include:
    * disease
    * symptom
    * diagnosis
    * stage
    * test
    * treatment
    * drug
    * gene
    * protein
    The edge types include, but not limited to:
    * has_symptom
    * associate_with
    * diagnose
    * has_stage
    * cure
    if it's references and background research ignore it.
    Please return only the json structure between '###'
    """,
            input_variables=[
                "paper_text",
                "knowledge_graph",
            ],
        )

        return cls(
            llm=llm,
            prompt=prompt,
            verbose=True,
        )

In [10]:
EMPTY_KG_DATA = dict(
    nodes=[
        dict(ntype="disease", name="Esophageal Carcinoma", id=1),
        dict(ntype="symptom", name="Dysphagia", id=2),
    ],
    edges=[
        dict(etype="has_symptom", source=1, target=2),
    ],
)

EMPTY_KG = f"###\n{json.dumps(EMPTY_KG_DATA,)}\n###"

In [15]:
extractor = KGChain.from_llm(llm=chat)

## Data
How to get more data please refer to [crawler](crawler.ipynb)

In [16]:
from pathlib import Path

In [17]:
DATA = Path("./data/")
TEXT = DATA / "text"

In [19]:
paper_list = list(TEXT.glob("*.txt"))
paper_list

[PosixPath('data/text/PMC9713002.txt'),
 PosixPath('data/text/PMC9709273.txt'),
 PosixPath('data/text/PMC9713855.txt'),
 PosixPath('data/text/PMC9708733.txt'),
 PosixPath('data/text/PMC9722938.txt'),
 PosixPath('data/text/PMC9712805.txt'),
 PosixPath('data/text/PMC9713848.txt'),
 PosixPath('data/text/PMC9712015.txt'),
 PosixPath('data/text/PMC9708886.txt'),
 PosixPath('data/text/PMC9714501.txt'),
 PosixPath('data/text/PMC9709130.txt'),
 PosixPath('data/text/PMC9711964.txt'),
 PosixPath('data/text/PMC9713810.txt')]

In [22]:
from tqdm.auto import tqdm
from typing import Iterator

In [23]:
def get_json_string(text, json_end: str = "###") -> str:
    if json_end not in text:
        return text.strip()
    else:
        return text.split(json_end)[1].strip()


def text_slicer(text_path, char_size: int = 3000)->Iterator:
    with open(text_path) as f:
        text = f.read()
    for i in tqdm(list(range(0, len(text), char_size)), leave=False):
        yield text[i:i+char_size]


def extract_one_paper(text_path: Path, char_size: int = 2000 ) -> str:
    kg = EMPTY_KG
    i = 0
    for text in text_slicer(text_path, char_size):
        res = extractor.run(
            paper_text=text,
            knowledge_graph=kg,
        )
        i += 1
        sleep(1)

        try:
            kg = json.dumps(json.loads(get_json_string(res)))
        except json.decoder.JSONDecodeError:
            print(f"Error at {text_path} -({i})")
    return kg
    

In [24]:
kg_string = extract_one_paper(paper_list[0])

  0%|          | 0/17 [00:00<?, ?it/s]



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    Front Med (Lausanne). 2022; 9: 1038097. Published online 2022 Nov 17.  doi: 10.3389/fmed.2022.1038097PMCID: PMC9713002PMID: 36465939Risk factors of lymph node metastasis or lymphovascular invasion for superficial esophageal squamous cell carcinoma: A practical and effective predictive nomogram based on a cancer hospital dataYali Tao,
†
 Shengsen Chen,
†
 Jiangping Yu, Qiwen Shen, Rongwei Ruan,
*
 and  Shi Wang
*
Author information Article notes Copyright and License information PMC DisclaimerDepartment of Endoscopy, Zhejiang Cancer Hospital, Institute of Basic Medicine and Cancer (IBMC), Chinese Academy of Sciences, Hangzhou, ChinaCorresponding author.Edited by: Pietro Mascagni, Fondazione Policlinico Universitario Agostino Gemelli IRCCS, ItalyReviewed by: Zhiyan Liu, Shanghai Jiao Tong University, China; Xiaoying Zhou, First Affilia

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..



[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    –), LNM negative and LVI positive (LNM–/LVI+), and LNM positive and LVI positive (LNM+/LVI+) were considered as LNM/LVI positive. LNM/LVI negative meant LNM negative and LVI negative (LNM–/LVI–).Statistical analysisComparisons between continuous variables were performed by using the Mann–Whitney U-test. We compared categorical variables by using the χ2 test or Fisher's exact test. Variables that were significantly associated with LNM/LVI in univariate analysis or had clinical significance for LNM/LVI were incorporated into multivariate logistic regression. LASSO regression is used to reduce the dimensionality of data and screen out the most significant variables with non-zero coefficients (19). The net reclassification improvement (NRI) can be used to assess whether the classification performance of a model

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. 


[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    performed using SPSS version 22.0 (SPSS Inc, Chicago, IL, USA) and R version 4.0.3 (The R Foundation for Statistical Computing, Vienna, Austria).ResultsClinicopathological characteristicsIn this study, 512 patients with SESCC were enrolled for analysis, with a median age of 61.5 years (range 22–79), including 438 men (85.5%) and 74 women (14.5%). Tumors located in the middle esophagus were found in 361 cases, followed by 133 cases with tumors located in the lower esophagus, and tumors located in the upper esophagus were only found in 18 cases. The median tumor size was 3 cm (range 3–11). A total of 142 patients (27.7%) had tumors confined to the mucosa, and 370 patients (72.3%) had tumors infiltrated into the submucosa. According to the tumor differentiation results, 114 cases (22.3%) were well-differentiat

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. 


[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    10.9)Open in a separate windowLNM, Lymph node metastasis; LVI, lymphovascular invasion; I, superficial and protruding type; II, flat type; III, superficial and excavated type; P, Categorical variables—χ2 test or Fisher's exact test; Continuous variables—Mann–Whitney test.The bold values mean statistical significance.Independent significant factors for LNM/LVIThe clinical characteristics between the LNM/LVI-positive and -negative groups were compared by using univariate analysis, and the results showed that tumor size, tumor location, depth of tumor invasion, tumor differentiation, macroscopic type, and multiple lesions were significantly correlated with LNM/LVI; while age, sex, and circumferential extension were not related with LNM/LVI (Table 1). We also identified the cutoff value of tumor size as 2.5 cm 

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..



[1m> Finished chain.[0m
Error at data/text/PMC9713002.txt -(6)


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    d values mean statistical significance.Identification of predictive factors by LASSO regressionSubsequently, nine variables were incorporated into LASSO regression and five variables including tumor size, depth of invasion, tumor differentiation, tumor location, and macroscopic type were screened out to be associated with LNM/LVI (Figure 3). The weights of each variable affecting LNM/LVI were assessed by calculating the coefficients in the LASSO regression. When λ = 0.039 and log(λ) = −3.251, the specific coefficients of each variable were shown as follows: coefficient of tumor location was 0.130, tumor size was 0.237, tumor invasion depth was 1.046, tumor differentiation was 0.377, and macroscopic type was 0.354 (Supplementary Table S2).Open in a separate windowFigure

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. 


[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    mean statistical significance.Development and validation of an LNM/LVI-predicting nomogram and its score systemNext, we used the five variables included in model B (tumor size, tumor invasion depth, tumor differentiation, macroscopic type, and tumor location) to develop a nomogram that could predict the probability of LNM/LVI presence (Figure 4A). ROC analysis showed that the AUC of this nomogram in LNM/LVI prediction was 0.776 (Figure 4B), suggesting the nomogram had a good predictive performance for LNM/LVI. In addition, the calibration curve demonstrated that the nomogram-predicted LNM/LVI status was nearly consistent with the actual LNM/LVI status (Figure 4C). Each variable in the nomogram was assigned a corresponding risk score (the detailed scores of these five variables are seen in Supplementary Tabl

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..



[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    LNM/LVI (the red curve) approaches as the risk threshold increases, indicating a good predictive power of this nomogram.Open in a separate windowFigure 5Clinical utility of the nomogram for predicting LNM/LVI in SESCC. (A) The discriminatory power of the nomogram for LNM/LVI was shown with a bar chart. Risk classification of the predictive nomograms was conducted by the maximum Youden index in a ROC analysis, and the performance in distinguishing the LNM/LVI was confirmed by the chi-square test. (B) Decision curve of the nomograms for predicting the presence of LNM/LVI. The horizontal solid black line represents the hypothesis that no patients experienced the presence of MVI or its M2 grade, and the solid gray line represents the hypothesis that all patients met the endpoint. (C) Clinical impact curve of th

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..



[1m> Finished chain.[0m
Error at data/text/PMC9713002.txt -(9)


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    vertheless, ER is unable to dissect the lymph nodes around the esophagus with potential tumor metastasis (29). Also, there is a lack of accurate methods to preoperatively assess the status of LNM in esophageal cancer (30). If SESCC patients with potential LNM undergo endoscopic resection, they will face the risk of tumor postoperative recurrence and metastasis (7, 29). Hence, ER is suitable for SESCC patients with negative LNM, and exploring the preoperative risk factors of LNM is important to select the best treatment strategy for patients with SESCC.Some studies have demonstrated a strong correlation between LVI and LNM (31, 32), and 48.7% (18/37) of LNM-positive patients with SESCC have LVI based on the results of hematoxylin-eosin staining (33). In this study, 25.8

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. 


[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    cancers, 5.14% of SESCC with good differentiation presented with LNM/LVI, which is relatively safe for receiving ER. Moreover, LNM/LVI appeared in 15.68% of moderate differentiated submucosal SESCC and 15.95% of poor differentiated submucosal SESCC (Supplementary Figure S2, the detailed LNM/LVI status according to invasion depth and tumor differentiation is shown in Supplementary Table S1), suggesting that these patients need to be treated with lymph node dissection.Interestingly, the macroscopic type of SESCC was also identified as a significant risk factor for the LNM/LVI of SESCC in this study. The macroscopic type of esophageal cancer had been reported to be correlated with the depth of tumor invasion, which might affect the status of LNM/LVI (36, 37). Furthermore, in our multivariate logistic analysis 

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..



[1m> Finished chain.[0m
Error at data/text/PMC9713002.txt -(11)


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    p of LNM/LVI. Esophagectomy plus lymph node dissection will be recommended if a patient with SESCC belongs to a high-risk group. Additionally, the good clinical utility and predictive power of our nomogram for predicting LNM/LVI were demonstrated by plotting the decision curve and clinical impact curve in DCA.To further evaluate the impact of the five variables (tumor size, tumor invasion depth, tumor differentiation, tumor location, and macroscopic type) in this nomogram on LNM/LVI, we incorporated them into the random forest model and calculated the importance of each variable to LNM/LVI. The results showed that the random forest model can well-distinguish the positive LNM/LVI and negative LNM/LVI in patients with SESCC, with an AUC of 0.775 in LNM/LVI prediction. B

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. 


[1m> Finished chain.[0m
Error at data/text/PMC9713002.txt -(12)


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    tion, tumor location, and macroscopic type were significant risk factors for the presence of LNM/LVI in patients with SESCC. Then, a nomogram and a random forest model for predicting LNM/LVI were established by incorporating these five variables. The good clinical utility of our nomogram was confirmed by DCA, and the risk score system also made the nomogram more clinically useful than the random forest model despite their similar AUC. Hence, this nomogram was adopted as a final tool for LNM/LVI prediction in patients with SESCC. According to the scoring system of our nomogram, if a patient with SESCC is classified as high risk for LNM/LVI, esophagectomy plus lymph node dissection will be prioritized to reduce potential tumor recurrence in the future. Last but not leas

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. 


[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
     F, et al.. Cancer statistics in china, 2015. CA Cancer J Clin. (2016) 66:115–32. 10.3322/caac.21338 [PubMed] [CrossRef] [Google Scholar]3. Jacobs M, Macefield RC, Elbers RG, Sitnikova K, Korfage IJ, Smets EM, et al.. Meta-analysis shows clinically relevant and long-lasting deterioration in health-related quality of life after esophageal cancer surgery. Qual Life Res. (2014) 23:1097–115. 10.1007/s11136-013-0576-5 [PubMed] [CrossRef] [Google Scholar]4. Ishihara R, Arima M, Iizuka T, Oyama T, Katada C, Kato M, et al.. Endoscopic submucosal dissection/endoscopic mucosal resection guidelines for esophageal cancer. Dig Endosc. (2020) 32:452–93. 10.1111/den.13654 [PubMed] [CrossRef] [Google Scholar]5. Sharma P, Shaheen NJ, Katzka D, Bergman J. Aga clinical practice update on endoscopic treatment of barrett's esop

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..



[1m> Finished chain.[0m
Error at data/text/PMC9713002.txt -(15)


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    nformatics. (2008) 24:2010–4. 10.1093/bioinformatics/btn356 [PubMed] [CrossRef] [Google Scholar]25. Pavey TG, Gilson ND, Gomersall SR, Clark B, Trost SG. Field evaluation of a random forest activity classifier for wrist-worn accelerometer data. J Sci Med Sport. (2017) 20:75–80. 10.1016/j.jsams.2016.06.003 [PubMed] [CrossRef] [Google Scholar]26. Sauer CM, Sasson D, Paik KE, McCague N, Celi LA, Sanchez FI, et al.. Feature selection and prediction of treatment failure in tuberculosis. PLoS ONE. (2018) 13:e207491. 10.1371/journal.pone.0207491 [PMC free article] [PubMed] [CrossRef] [Google Scholar]27. Ishihara R, Iishi H, Uedo N, Takeuchi Y, Yamamoto S, Yamada T, et al.. Comparison of emr and endoscopic submucosal dissection for en bloc resection of early esophageal cancer

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. 


[1m> Finished chain.[0m
Error at data/text/PMC9713002.txt -(16)


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Please extract knowledge from the following piece of text from a paper.:
    4:1–7. 10.1186/s12957-016-1016-0 [PMC free article] [PubMed] [CrossRef] [Google Scholar]36. Update on the paris classification of superficial neoplastic lesions in the digestive tract . Endoscopy. (2005) 37:570–8. 10.1055/s-2005-861352 [PubMed] [CrossRef] [Google Scholar]37. Jia R, Luan Q, Wang J, Hou D, Zhao S. Analysis of predictors for lymph node metastasis in patients with superficial esophageal carcinoma. Gastroent Res Pract. (2016) 2016:1–6. 10.1155/2016/3797615 [PMC free article] [PubMed] [CrossRef] [Google Scholar]38. Iasonos A, Schrag D, Raj GV, Panageas KS. How to build and interpret a nomogram for cancer prognosis. J Clin Oncol. (2008) 26:1364–70. 10.1200/JCO.2007.12.9791 [PubMed] [CrossRef] [Google Scholar]39. Shariat SF, Capitanio U, Jeldres C, Karakiewic

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo-16k in organization org-BsVT4MvgMqP4ma0WhK2sPKAr on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. 


[1m> Finished chain.[0m


In [26]:
json.loads(kg_string)

{'nodes': [{'ntype': 'diagnosis',
   'name': 'Lymphovascular Invasion (LVI)',
   'id': 3},
  {'ntype': 'diagnosis', 'name': 'Lymph Node Metastasis (LNM)', 'id': 4},
  {'ntype': 'diagnosis',
   'name': 'Superficial Esophageal Squamous Cell Carcinoma (SESCC)',
   'id': 5},
  {'ntype': 'stage', 'name': 'Tumor Size', 'id': 6},
  {'ntype': 'stage', 'name': 'Circumferential Extension', 'id': 7},
  {'ntype': 'stage', 'name': 'Location within Esophagus', 'id': 8},
  {'ntype': 'stage', 'name': 'Depth of Invasion', 'id': 9},
  {'ntype': 'stage', 'name': 'Tumor Differentiation', 'id': 10},
  {'ntype': 'stage', 'name': 'Macroscopic Type', 'id': 11},
  {'ntype': 'stage', 'name': 'Multiple Lesions', 'id': 12},
  {'ntype': 'test', 'name': 'SPSS', 'id': 13},
  {'ntype': 'test', 'name': 'R', 'id': 14},
  {'ntype': 'treatment', 'name': 'Esophagectomy', 'id': 15},
  {'ntype': 'treatment', 'name': 'Lymph Node Dissection', 'id': 16}],
 'edges': [{'etype': 'associate_with', 'source': 3, 'target': 5},
  {'et

In [34]:
from jinja2 import Template

In [39]:
with open("vis-kg.html") as f:
    nodes_template = Template(f.read())

In [None]:
# display html

from IPython.display import HTML, display

display(HTML(nodes_template.render(kg_string=kg_string)))