In [None]:
import os
os.environ["OPENAI_API_KEY"]="Your_GPT_key_here"

In [None]:
from dokument import Dokument
from synthesis import Synthesis

import pickle
# Load list of documents with text data previously extracted
with open("dokument_list.pkl", "rb") as file:
    dokument_list = pickle.load(file)
print("Loaded", len(dokument_list), "documents")

In [None]:
# Template for extracting risk of bias information from document text
signalling_template = """Consider the following text extracted from a document:
{docs}
If a trial is conducted and described in the document,
For each of the following signalling questions, determine the level of risk of bias as 
"low", "unclear", or "high" based on information found about the trial. 
If the document contains multiple trials, answer the signalling questions 
for each trials individually. Skip the task but justify the decision if no trial
is involved in the document

### Domain 1: Sequence Generation
1. Was the allocation sequence adequately generated?

### Domain 2: Allocation Concealment
2. Was the allocation adequately concealed?

### Domain 3: Blinding
3. Were participants and study personnel blinded to the intervention?
4. Was the blinding of outcome assessment adequate?

### Domain 4: Incomplete Outcome Data
5. Were there incomplete outcome data?
6. Were incomplete outcome data adequately addressed?

### Domain 5: Selective Reporting
7. Was the study free from selective outcome reporting?

### Domain 6: Other Sources of Bias
8. Were the groups similar at baseline for important prognostic factors?
9. Was the study apparently free of other problems that could put it at a high risk of bias?"""

# Create a ChatPromptTemplate using the signalling template
from langchain.prompts import ChatPromptTemplate
signalling_prompt = ChatPromptTemplate.from_template(signalling_template)

# Initialize the GPT-4 model with specified parameters
from langchain_openai import ChatOpenAI
gpt4_model = ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview")

# Set up the output parser to handle the model's responses
from langchain.schema.output_parser import StrOutputParser
str_output_parser = StrOutputParser()

# Chain the prompt, model, and output parser to create the signalling_chain
signalling_chain = signalling_prompt | gpt4_model | str_output_parser

In [None]:
# Define an asynchronous function to process signalling questions for each document
async def answers_to_signalling_question(dokument_list):
    # Extract raw data from each document in the list
    raw_data_list = [doc.raw_data for doc in dokument_list]
    
    # Run the signalling questions chain on the raw data of all documents
    signalling_questions_results = await signalling_chain.abatch(raw_data_list)
    
    # Assign the results to each document and print the results
    for i in range(len(dokument_list)):
        dokument_list[i].rob_sqa = signalling_questions_results[i]
        print(f"Document {i+1}:")
        print("Signalling question results:", dokument_list[i].rob_sqa)
        print("-" * 30)  # Separator

# Usage: Run the function on the dokument_list
await answers_to_signalling_question(dokument_list)

In [None]:
# Template for determining the overall Risk of Bias (RoB) for a document
roblevel_template = """

You have previously assessed the risk of bias for individual trials within a document. 
Here is the assessment: {docs}

Now, if relevant information was provided in the assessment, determine the overall Risk of Bias (RoB) across the entire document based on the assessments of 
individual trials. Use the following criteria to summarize the RoB:

1. **Low RoB within a trial**: Low risk of bias across all key domains
2. **Low RoB across trials**: Most information is from trials at low risk of bias

3. **Unclear RoB within a trial**: Low or unclear risk of bias for all key domains
4. **Unclear RoB across trials**: Most information is from trials at low or unclear risk of bias

5. **High RoB within a trial**: High risk of bias for one or more key domains
6. **High RoB across trials**: The proportion of information from trials at high risk of bias is 
sufficient to affect the interpretation of results.
"""

# Create a ChatPromptTemplate using the RoB level template
roblevel_prompt = ChatPromptTemplate.from_template(roblevel_template)

# Chain the prompt, model, and output parser to create the RoB level chain
roblevel_chain = roblevel_prompt | gpt4_model | str_output_parser


In [None]:
# Define an asynchronous function to determine the overall Risk of Bias (RoB) for each document
async def roblevel_from_sqa(dokument_list):
    # Extract the risk of bias signalling question assessments for each document
    rob_sqa_list = [doc.rob_sqa for doc in dokument_list]
    
    # Run the RoB level chain on the risk of bias assessments of all documents
    roblevels = await roblevel_chain.abatch(rob_sqa_list)
    
    # Assign the overall Risk of Bias levels to each document and print the results
    for i in range(len(dokument_list)):
        dokument_list[i].rob = roblevels[i]
        print(f"Document {i+1}:")
        print("Risk of bias levels:", dokument_list[i].rob)
        print("-" * 30)  # Separator

# Usage: Run the function on the dokument_list
await roblevel_from_sqa(dokument_list)

In [None]:
# Save dokument_list with pickle
import pickle

with open("dokument_list.pkl", "wb") as file:
    pickle.dump(dokument_list, file)

In [None]:
# debug print
print(dokument_list[3].DOI)
print(dokument_list[3].rob)