In [None]:
import os
os.environ["OPENAI_API_KEY"]="Your_GPT_key_here"
# Specify your research topic
research_topic = "How accessibility can be improved using mixed reality?"

In [None]:
from dokument import Dokument
from synthesis import Synthesis
# Could replace your synthesis_task variable name, such as something that represents the research topic
synthesis_task = Synthesis(research_topic)

import pickle
with open("dokument_list.pkl", "rb") as file:
    dokument_list = pickle.load(file)

In [None]:
# Define template for determining eligibility based on a research topic in "0" or "1"
eligibility_bool_template = """
Consider the following research topic:
{topic}
Examine the following document and decide whether or not the document is
eligible as research material for the research topic. Response with "1" if yes, "0" if no
{docs}
"""
# Format the template with the specified research topic
eligibility_bool_template_formatted = eligibility_bool_template.format(topic=research_topic, docs="{docs}")

# Create a Prompt Template using the formatted eligibility template
from langchain.prompts import ChatPromptTemplate
eligibility_bool_prompt = ChatPromptTemplate.from_template(eligibility_bool_template_formatted)

# Define the language model with 0 temperature
from langchain_openai import ChatOpenAI
gpt4_model = ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview")

# Define output parser to handle output
from langchain.schema.output_parser import StrOutputParser
str_output_parser = StrOutputParser()

# Final chain for determining eligibility based on a research topic in "0" or "1"
eligibility_bool_chain = eligibility_bool_prompt | gpt4_model | str_output_parser

In [None]:
# Define template for determining eligibility based on a research topic
eligibility_template = """
Consider the following research topic:
{topic}
Examine the following document and decide whether or not the document is
eligible as research material for the research topic. Answer with Yes or No, then explain your response.
{docs}
"""

# Format the template with the specified research topic
eligibility_template_formatted = eligibility_template.format(topic=research_topic, docs="{docs}")

# Create a Prompt Template using the formatted eligibility template
eligibility_prompt = ChatPromptTemplate.from_template(eligibility_template_formatted)

# Final chain for determining eligibility based on a research topic
eligibility_chain = eligibility_prompt | gpt4_model | str_output_parser

In [None]:
# Define asynchronous function to run a batch of requests with the chain using a list of document input
async def eligibility_bool_from_raw_data(dokument_list):
    doi_list = [doc.DOI for doc in dokument_list]
    # Create list of raw data from the document list for input to the chain
    raw_data_list = [doc.raw_data for doc in dokument_list]
    # Run the eligibility chains on the input
    eligibility_bool_list = await eligibility_bool_chain.abatch(raw_data_list)
    eligibility_reasoning = await eligibility_chain.abatch(raw_data_list)


    for i in range(len(dokument_list)):
        # Save the eligibility answers to the documents in the list
        dokument_list[i].eligibility = eligibility_reasoning[i]
        print(eligibility_bool_list[i], "\n")
        print(eligibility_reasoning[i], "\n")
        # Assign document to list of eligible documents for synthesis task
        if (eligibility_bool_list[i] == "1") & (doi_list[i] == dokument_list[i].DOI):
            synthesis_task.add_eligible_document(dokument_list[i])

# Usage: Run the function on the dokument_list
await eligibility_bool_from_raw_data(dokument_list)

In [None]:
# Check number of eligible documents
number_of_eligible_doc = len(synthesis_task.eligible_documents)
print("There are",number_of_eligible_doc," documents eligible for the chosen research topic"  )

# Example: Check eligibility of document 5
print(dokument_list[4].eligibility)

In [None]:
import pickle
# Save synthesis info. Specify pickle file name of your choice, will be used for synthesis
with open("synthesis_task.pkl", "wb") as file:
    pickle.dump(synthesis_task, file)