In [1]:
from llama_index.program import (
    OpenAIPydanticProgram,
    DFFullProgram,

    DataFrame,
    DataFrameRowsOnly,
    DFRowsProgram,
)
from llama_index.llms import OpenAI

In [2]:
from pydantic import BaseModel
from typing import List


In [71]:
import pandas as pd
df = pd.read_excel("./LOE Sample.xlsx")

In [72]:
df.shape

(130, 54)

In [73]:
df.head()

Unnamed: 0,endnote_id,endnote_recnum,ref_type,authors,author_country,title,journal_name,pages,volume,issue,...,is_platform,surgical_subspecialty,translated_title,data_status,data_comments,created_time,created_by,updated_time,updated_by,load_seq
0,409f771a2721426bace945f2e799a884,116975,17,"[\n ""Shi, H."",\n ""Li, J."",\n ""Li, K."",\n ""...","[\n ""China""\n]",Minimally invasive versus open radical cystect...,Journal of International Medical Research,4604-4618,47.0,10.0,...,,Uro_Bladder,,READY,Comments,2023-11-03 15:52:12.900,svc_lit_kb_rw,,,52
1,a80a2a9ae4424a48babd8beeca7225bc,104348,17,"[\n ""Simillis, C."",\n ""Lal, N."",\n ""Thoukid...","[\n ""Netherlands"",\n ""Switzerland"",\n ""Unit...",Open Versus Laparoscopic Versus Robotic Versus...,Ann Surg,,,,...,,GS_Colorectal,,READY,Comments,2023-11-03 15:52:12.900,svc_lit_kb_rw,,,52
2,06a6de1262c94155ad27f792b1fe2278,115419,17,"[\n ""Zheng, B."",\n ""Zhang, X."",\n ""Wang, X....","[\n ""China""\n]","A comparison of open, laparoscopic and robotic...",Colorectal Dis,,,,...,,GS_Colorectal,,READY,Comments,2023-11-03 15:52:12.900,svc_lit_kb_rw,,,52
3,bfab9ecce99d4caeb17b14a3336d60b5,127618,17,"[\n ""Allaix, M. E."",\n ""Rebecchi, F."",\n ""F...","[\n ""Italy"",\n ""United States""\n]",The Landmark Series: Minimally Invasive (Lapar...,Annals of Surgical Oncology,,,,...,,GS_Colorectal,,READY,Comments,2023-11-03 15:52:12.900,svc_lit_kb_rw,,,52
4,69348b561774440781622a50db16cfd1,134090,17,"[\n ""Bafort, C."",\n ""Beebeejaun, Y."",\n ""To...","[\n ""Belgium"",\n ""United Kingdom""\n]",Laparoscopic surgery for endometriosis,The Cochrane database of systematic reviews,CD011031,10.0,,...,,Gyn_Fertility,,READY,Comments,2023-11-03 15:52:12.900,svc_lit_kb_rw,,,52


In [74]:
df.columns

Index(['endnote_id', 'endnote_recnum', 'ref_type', 'authors', 'author_country',
       'title', 'journal_name', 'pages', 'volume', 'issue', 'epub_date',
       'keywords', 'year', 'isbn_issn', 'accession_num', 'notes', 'loe',
       'urls', 'procedure', 'robotic', 'surgical_specialty', 'region',
       'pub_database', 'search_date', 'start_page', 'doi', 'language',
       'abstract', 'author_address', 'work_type', 'pdf_urls', 'study_n',
       'alt_title', 'publisher', 'database_provider', 'comparison_arm',
       'orig_pub', 'mp_system', 'short_title', 'db_source', 'is_author',
       'indication', 'loe_history', 'secondary_authors', 'is_platform',
       'surgical_subspecialty', 'translated_title', 'data_status',
       'data_comments', 'created_time', 'created_by', 'updated_time',
       'updated_by', 'load_seq'],
      dtype='object')

In [75]:
df = df.dropna(subset=['abstract'])

In [76]:
df.shape

(118, 54)

In [77]:
df = df.reset_index()

In [207]:
# class Procedure(BaseModel):
#     """Procedures from the article. ie. prostatectomy"""
#     Procedure_name: str

# class Comparison(BaseModel):
#     """Comparison arms, ie. robotic vs. laparoscopic."""
#     Compare_arms: str


# class Abstract(BaseModel):
#     """Data model for an abstract."""

#     P_type: List[Procedure]
#     P_type_r: str
#     All_comparisons: List[Comparison]
#     All_comparisons_r: str
#     Robotic: str
#     Robotic_r: str
#     MIS: str
#     MIS_r: str

In [160]:

class Abstract(BaseModel):
    """Data model for an abstract."""

    P_type: str
    P_type_r: str
    All_comparisons: str
    All_comparisons_r: str
    Robotic: str
    Robotic_r: str
    MIS: str
    MIS_r: str

In [169]:
program = OpenAIPydanticProgram.from_defaults(
    output_cls=Abstract,
    llm=OpenAI(temperature=0, model="gpt-4"),
    prompt_template_str=(
        " Please extract the following abstracts into a structured data according to: {input_str}. Each indivisual abstract is separated by '||'."
        " The column names are the following: ['P_type', 'P_type_r','All_comparisons', 'All_comparisons_r', 'Robotic', 'Robotic_r', 'MIS', 'MIS_r']"
        " Do not specify additional column that are not in the function schema."
        " Each abstract's structured data is represented as a single row."
        " ------------------"
        " In the P_type column, review the abstract and extract a list of procedure types studied from the article."         
        " Choose one label that most accurately depict the primary procedure explored in the article."
        " Lobectomy: Lobectomy was studied in the article."
        " Benign Hysterectomy: Benign Hysterectomy was studied in the article."
        " Partial Nephrectomy: Partial Nephrectomy was studied in the article."
        " LAR, Lower anterior resection: LAR, Lower anterior resection was studied in the article."
        " TME, Total mesorectal excision: TME, Total mesorectal excision was studied in the article."
        " Myomectomy: Myomectomy was studied in the article."
        " Inguinal Hernia Repair: Inguinal Hernia Repair was studied in the article."
        " Roux-en Y Gastric Bypass: Roux-en Y Gastric Bypass was studied in the article."
        " Prostatectomy: Prostatectomy was studied in the article."
        " Ventral Hernia Repair: Ventral Hernia Repair was studied in the article."
        " Sigmoidectomy: Sigmoidectomy was studied in the article."
        " Right colectomy: Right colectomy was studied in the article."
        " Rectal Resection (LAR/TME/ISR): Rectal Resection (LAR/TME/ISR) was studied in the article."
        " Cervical Cancer Hysterectomy: Cervical Cancer Hysterectomy was studied in the article."
        " Endometrial Cancer Hysterectomy: Endometrial Cancer Hysterectomy was studied in the article."
        " Duodenal switch: Duodenal switch was studied in the article."
        " Sleeve Gastrectomy: Sleeve Gastrectomy was studied in the article."
        " Esophagectomy: Esophagectomy was studied in the article."
        " Cystectomy: Cystectomy was studied in the article."
        " Sacrocolpopexy: Sacrocolpopexy was studied in the article."
        " Endometriosis Resection: Endometriosis Resection was studied in the article."
        " Radical Nephrectomy: Radical Nephrectomy was studied in the article."
        " Pancreatectomy: Pancreatectomy was studied in the article."
        " Thymectomy: Thymectomy was studied in the article."
        " Extract procedure type from the abstract if none of the previously listed procedure labels are applicable to the article."
        " ------------------"
        " In the P_type_r column, include a paragraph from the abstract that justifies the selection of procedure labels for P_type."
        " ------------------"
        " In the All_comparisons column, review the abstract and extract list comparisons presented in the article."
        " ------------------"
        " In the All_comparisons_r column, include a paragraph from the abstract that justifies the result for All_comparisons."
        " ------------------"
        " In the Robotic column, find articles that mention robotic surgery or robotic-assisted surgery. Use Yes/No for labels."
        " ------------------"
        " In the Robotic_r column, include a paragraph from the abstract that justifies result for Robotic column."
        " ------------------"
        " In the MIS column, find articles that mention minimally invasive surgery (MIS). Use Yes/No for labels."
        " ------------------"
        " In the MIS_r column, include a paragraph from the abstract that justifies result for MIS column."
    ),
    verbose=True
)

In [180]:
df_result = pd.DataFrame(columns=['P_type', 'P_type_r','All_comparisons', 'All_comparisons_r', 'Robotic', 'Robotic_r', 'MIS', 'MIS_r'])

for i in range(0,13):
    df_input = df.abstract[i]
    response_obj = program(input_str=df_input)
    data = response_obj.model_dump()
    df_output = pd.DataFrame([{'P_type': data['P_type'], 'P_type_r': data['P_type_r'],
                              'All_comparisons': data['All_comparisons'], 'All_comparisons_r': data['All_comparisons_r'], 
                              'Robotic': data['Robotic'], 'Robotic_r': data['Robotic_r'], 
                              'MIS': data['MIS'], 'MIS_r': data['MIS_r']}])
    df_result = pd.concat([df_result, df_output])

Function call: Abstract with args: {
"P_type": "Cystectomy",
"P_type_r": "We performed a systematic review and meta-analysis to evaluate the efficacy and safety of minimally invasive radical cystectomy (MIRC) versus open radical cystectomy (ORC) for bladder cancer.",
"All_comparisons": "MIRC vs ORC in terms of: recurrence rate, mortality, lymph node yield, positive lymph nodes, positive surgical margins, operating time, estimated blood loss, blood transfusion rate, time to regular diet, length of hospital stay, complication rate",
"All_comparisons_r": "Eight articles describing nine RCTs (803 patients) were analyzed. No significant differences were found between MIRC and ORC in two oncologic outcomes: the recurrence rate and mortality. Additionally, no significant differences were found in three pathologic outcomes: lymph node yield, positive lymph nodes, and positive surgical margins. With respect to perioperative outcomes, however, MIRC showed a significantly longer operating time, l

In [181]:
df_result

Unnamed: 0,P_type,P_type_r,All_comparisons,All_comparisons_r,Robotic,Robotic_r,MIS,MIS_r
0,Cystectomy,We performed a systematic review and meta-anal...,"MIRC vs ORC in terms of: recurrence rate, mort...",Eight articles describing nine RCTs (803 patie...,Yes,We found no statistically significant differen...,Yes,We performed a systematic review and meta-anal...
0,Rectal Resection (LAR/TME/ISR),This network meta-analysis of 29 randomized co...,open vs laparoscopic vs robotic vs transanal m...,Twenty-nine randomized controlled trials inclu...,Yes,Robotic operative time was longer compared wit...,Yes,The laparoscopic and robotic approaches may im...
0,"TME, Total mesorectal excision",BACKGROUND: Total mesorectal excision for rect...,"OpTME vs LaTME, OpTME vs RoTME, LaTME vs RoTME",The TSA demonstrated the cumulative z curve cr...,Yes,Based on P-score of completeness of the TME sp...,Yes,Although open TME may give better pathological...
0,"Laparoscopic colectomy, Rectal Resection (LAR/...",Current high-quality evidence supports the rou...,"Laparoscopic colectomy vs Open surgery, Laparo...",Pathology and long-term oncologic outcomes are...,Yes,"Furthermore, the impact of robotic technology ...",Yes,Current high-quality evidence supports the rou...
0,Endometriosis Resection,BACKGROUND: Endometriosis is associated with p...,laparoscopic ablation or excision vs diagnosti...,SELECTION CRITERIA: We selected randomised con...,No,The abstract does not mention robotic surgery ...,Yes,"The abstract mentions laparoscopic surgery, wh..."
0,Rectal Resection (LAR/TME/ISR),This review evaluated the non-inferiority of e...,Early urinary catheter removal vs Late urinary...,RCTs comparing early versus late catheter remo...,No,The abstract does not mention robotic surgery ...,No,The abstract does not mention minimally invasi...
0,"TME, Total mesorectal excision",We aimed to evaluate comparative outcomes of r...,Robotic TME vs Laparoscopic TME,Nine randomised-controlled trials reporting 14...,Yes,We aimed to evaluate comparative outcomes of r...,Yes,We aimed to evaluate comparative outcomes of r...
0,"LAR, Lower anterior resection",Numerous randomized controlled trials comparin...,end-to-end vs end-to-side anastomosis after lo...,this study meta-analyzes pooled data comparing...,No,The abstract does not mention robotic or robot...,No,The abstract does not mention minimally invasi...
0,Chemotherapy for oral cavity and oropharyngeal...,Oral cavity and oropharyngeal cancers are the ...,Chemotherapy prior to radiotherapy vs. Surgery...,"Overall, there is insufficient evidence to cle...",No,The abstract does not mention robotic surgery ...,No,The abstract does not mention minimally invasi...
0,Pancreatectomy,"Methods: PubMed, the Cochrane Central Register...",partial pancreatoduodenectomy vs distal pancre...,For partial pancreatoduodenectomy and distal p...,No,The abstract does not mention anything about r...,No,The abstract does not mention anything about m...


In [184]:
df_result = df_result.reset_index()

In [186]:
df_result.to_excel('./GPT4_result.xlsx')

In [189]:
class LOE(BaseModel):
    """Data model for an abstract."""

    Editorial: bool
    Review: bool
    Systematic_review: bool
    Comparison_study: bool
    Randomized_study: bool
    Database_paper: bool
    Clinical_paper_with_robotic: bool
    Robotic_vs_robotic_comparison: bool
    Prospective_study: bool
    Clinical_paper_multiple: bool

In [190]:
program2 = OpenAIPydanticProgram.from_defaults(
    output_cls=LOE,
    llm=OpenAI(temperature=0, model="gpt-4"),
    prompt_template_str=(
        " Please extract the following abstracts into a structured data according to: {input_str}. Each indivisual abstract is separated by '||'."
        " The column names are the following: ['Editorial', 'Review', 'Systematic_review', 'Comparison_study', 'Randomized_study', 'Database_paper', 'Clinical_paper_with_robotic', 'Robotic_vs_robotic_comparison', 'Prospective_study', 'Clinical_paper_multiple']"
        " Do not specify additional column that are not in the function schema."
        " Each abstract's structured data is represented as a single row."
        " ------------------"
        " In the Editorial column, review the abstract define if the study is an Editorial/comment/letter/author reply, discussion,"
        " erratum, case study(N<10 cases), introduction, animal, cadaveric, bench study, abstract only, protocol, interview or technical paper with no specific patients?" 
        " ------------------"
        " In the Review column, is the article a literature review."
        " ------------------"
        " In the Systematic_review column, is the article a systematic review article?"
        " ------------------"
        " In the Comparison_study column, is the analysis limited to comparison studies?"
        " ------------------"
        " In the Randomized_study column, is the analysis limited to randomized studies?"
        " ------------------"
        " In the Database_paper column, is the article a database paper(named: NIS, SEER, etc..)"
        " ------------------"
        " In the Clinical_paper_with_robotic column, is the article a primary data clinical paper with da Vinci as the only approach?"
        " ------------------"
        " In the Robotic_vs_robotic_comparison column, is there a planned robotic vs. robotic comparison?"
        " ------------------"
        " In the Prospective_study column, is the study prospective study?"
        " ------------------"
        " In the Clinical_paper_multiple column, is the study clinical paper with multiple treatments or multiple surgical approaches?"
    ),
    verbose=True
)

In [206]:
df_LOE = pd.DataFrame(columns=['Editorial', 'Review', 'Systematic_review', 'Comparison_study', 'Randomized_study', 'Database_paper', 'Clinical_paper_with_robotic', 'Robotic_vs_robotic_comparison', 'Prospective_study', 'Clinical_paper_multiple'])

for i in range(0,len(df.abstract)):
    df_input = df.abstract[i]
    response_obj = program2(input_str=df_input)
    data = response_obj.model_dump()
    df_output = pd.DataFrame(data, index=[i])
    df_LOE = pd.concat([df_LOE, df_output])

Function call: LOE with args: {
"Editorial": false,
"Review": false,
"Systematic_review": true,
"Comparison_study": true,
"Randomized_study": true,
"Database_paper": false,
"Clinical_paper_with_robotic": false,
"Robotic_vs_robotic_comparison": false,
"Prospective_study": false
}
Function call: LOE with args: {
"Editorial": false,
"Review": false,
"Systematic_review": true,
"Comparison_study": true,
"Randomized_study": true,
"Database_paper": false,
"Clinical_paper_with_robotic": false,
"Robotic_vs_robotic_comparison": false,
"Prospective_study": false
}
Function call: LOE with args: {
"Editorial": false,
"Review": false,
"Systematic_review": true,
"Comparison_study": true,
"Randomized_study": true,
"Database_paper": false,
"Clinical_paper_with_robotic": false,
"Robotic_vs_robotic_comparison": false,
"Prospective_study": false
}
Function call: LOE with args: {
"Editorial": false,
"Review": false,
"Systematic_review": false,
"Comparison_study": true,
"Randomized_study": true,
"Database_p

In [208]:
df_LOE

Unnamed: 0,Editorial,Review,Systematic_review,Comparison_study,Randomized_study,Database_paper,Clinical_paper_with_robotic,Robotic_vs_robotic_comparison,Prospective_study,Clinical_paper_multiple
0,False,False,True,True,True,False,False,False,False,
1,False,False,True,True,True,False,False,False,False,
2,False,False,True,True,True,False,False,False,False,
3,False,False,False,True,True,False,True,False,False,
4,False,True,True,True,True,False,False,False,False,
...,...,...,...,...,...,...,...,...,...,...
113,False,False,False,False,False,False,False,False,False,
114,False,False,False,False,False,False,False,False,False,
115,False,False,False,False,False,False,False,False,False,
116,False,False,False,False,False,False,False,False,False,
