In [None]:
import os
os.environ["OPENAI_API_KEY"]="Your_GPT_key_here"

In [None]:
from dokument import Dokument
import pickle

# Load document list
with open("dokument_list.pkl", "rb") as file:
    dokument_list = pickle.load(file)
print("Loaded", len(dokument_list), "documents")

In [None]:
# Define the template for extracting data for visualizing studies from a document
extract_data_template = """Consider the following text extracted from a document:
{docs}

For each individual study described in the document (if any), extract the following data points:
- Study identifier (e.g., author, year)
- Sample size
- Effect size
- Confidence interval
- P-value
- Any other relevant metrics

For synthesis results (e.g., meta-analyses), extract:
- Overall effect size
- Heterogeneity measures
- Summary statistics
- Confidence interval

"""
# Create a a Prompt Template using the data extraction template
from langchain.prompts import ChatPromptTemplate
extract_data_prompt = ChatPromptTemplate.from_template(extract_data_template)

# Define the language model with 0 temperature
from langchain_openai import ChatOpenAI
gpt4_model = ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview")

# Define output parser to handle output
from langchain.schema.output_parser import StrOutputParser
str_output_parser = StrOutputParser()

# Final chain for data extraction
extract_data_chain = extract_data_prompt | gpt4_model | str_output_parser

In [None]:
# Run the chain on a document with a study to be visualized
doklist_3_extracted = extract_data_chain.invoke(dokument_list[3].raw_data)
print(dokument_list[3].DOI)
print(doklist_3_extracted)

In [None]:
# Define a template for visual representation Python code generation
visualization_template = (
"""
Based on the following extracted data, generate Python code to create visual representations, 
such as forest plots and summary tables.
{docs}
""")
# Create a a Prompt Template using the visualization template
visualization_prompt = ChatPromptTemplate.from_template(visualization_template)

# Final chain for visualization
visualization_data_chain = visualization_prompt | gpt4_model | str_output_parser

In [None]:
# Run the chain to get Python code generation visual representation for the selected document
doklist_3_visualized = visualization_data_chain.invoke(doklist_3_extracted)
print(doklist_3_visualized)