In [None]:
import os
os.environ["OPENAI_API_KEY"]="Your_GPT_key_here"

In [None]:
from dokument import Dokument
from synthesis import Synthesis

import pickle
# load your Synthesis object with existing .topic and .eligible_documents attributes
with open("synthesis_task.pkl", "rb") as file:
    synthesis_task = pickle.load(file)
print("Research topic to be synthesized: ", synthesis_task.topic)

In [None]:
# Define the template for the coding of thematic analysis, including the research topic and document text
code_template = ("""
Consider the research topic:
{topic}

And the text input from a publication:
Text inputs:
{docs}

Please perform the following tasks for thematic analysis:

Identify and Assign Codes:
For each segment of text that is relevant to the research topic, identify and assign a code.
Provide Code Details:
For each code, output the following information:

Code Name: A concise, descriptive name for the code.
Code Definition: A brief explanation of what the code represents.
Example: An illustrative quote or excerpt that exemplifies the code.

Example Output 1:
**Code Name**: Product Quality
**Code Definition**: Describes the perceived value and performance of a product.
**Example**: Many reviews highlighted the excellent build quality and durability of the product.

Example Output 2:
**Code Name**: User Feedback
**Code Definition**: "Encompasses comments and suggestions provided by users to improve the product or service.
**Example**: "Users suggested adding more customization options to enhance their experience.
""")

# Format the template with the specific research topic and a placeholder for document text
code_template_formatted = code_template.format(topic=synthesis_task.topic, docs="{docs}")

# Create a ChatPromptTemplate instance using the formatted template
from langchain.prompts import ChatPromptTemplate
codes_prompt = ChatPromptTemplate.from_template(code_template_formatted)

# Initialize the GPT-4 model with specified parameters (e.g., temperature setting and model name)
from langchain_openai import ChatOpenAI
gpt4_model = ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview")

# Set up the output parser to handle the model's responses
from langchain.schema.output_parser import StrOutputParser
str_output_parser = StrOutputParser()

# Chain the prompt, model, and output parser to create the coding chain for a thematic analysis
codes_chain = codes_prompt | gpt4_model | str_output_parser

In [None]:
# Define an asynchronous function to process and assign codes for each document
async def codes_from_raw_data(dokument_list):
    # Extract raw data from each document in the list
    raw_data_list = [doc.raw_data for doc in dokument_list]
    
    # Run the codes chain on the raw data of all documents asynchronously
    codes_list = await codes_chain.abatch(raw_data_list)
    
    # Assign the results to each document and print the codes
    for i in range(len(dokument_list)):
        dokument_list[i].codes = codes_list[i]
        print(f"Document {i+1}:")
        print("Codes:", dokument_list[i].codes)
        print("-" * 30)  # Separator

# Usage: Run the function on the list of eligible documents
await codes_from_raw_data(synthesis_task.eligible_documents)

In [None]:
print(synthesis_task.eligible_documents[0].DOI)

In [None]:
# Define the template for identifying themes from coded data, including the research topic
themes_template = ("""
Consider the research topic: {topic}

You will receive input of coded data from a research document, which contains categorized segments of text.
For the process of thematic analysis regarding the research topic,
identify overarching themes that encapsulate common patterns present across the coded segments. 
Only include themes that are relevant to the research topic. 

Coded data input:
{docs}

Example output 1:
**Theme Name**: Product Quality Perception

**Description**: Overall assessment of how customers perceive the quality of the product, 
encompassing both high quality and poor quality.

Example output 2:
**Theme Name**: Customer Experience

**Description**: Includes all aspects of the customer’s interaction with the product and service, 
such as satisfaction and dissatisfaction.
""")

# Format the template with the specific research topic and a placeholder for coded data
themes_template_formatted = themes_template.format(topic=synthesis_task.topic, docs="{docs}")

# Create a ChatPromptTemplate instance using the formatted template
themes_prompt = ChatPromptTemplate.from_template(themes_template_formatted)

# Chain the prompt, model, and output parser to create the thematic analysis chain
themes_chain = themes_prompt | gpt4_model | str_output_parser

In [None]:
# Define an asynchronous function to process and assign themes based on coded data for each document
async def themes_from_codes(dokument_list):
    # Extract codes from each document in the list
    codes_list = [doc.codes for doc in dokument_list]
    
    # Run the themes chain on the coded data of all documents asynchronously
    themes_list = await themes_chain.abatch(codes_list)
    
    # Assign the results to each document and print the themes
    for i in range(len(dokument_list)):
        dokument_list[i].themes = themes_list[i]
        print(f"Document {i+1}:")
        print("Themes:", dokument_list[i].themes)
        print("-" * 30)  # Separator

# Usage: Run the function on the list of eligible documents
await themes_from_codes(synthesis_task.eligible_documents)

In [None]:
# Prepare a variable to accumulate the themes output for all eligible documents
themes_output = ""

# Iterate over each eligible document to append its themes to the output variable
for i in range(len(synthesis_task.eligible_documents)):
    themes_output += f"Document {i+1}:\n"
    themes_output += f"Themes: {synthesis_task.eligible_documents[i].themes}\n"
    themes_output += "-" * 30 + "\n"  # Add a separator for better readability

# Count number of tokens in the accumulated themes output
num_tokens = gpt4_model.get_num_tokens(themes_output)

# Check if the number of tokens exceeds the model's limit and print an appropriate message
if num_tokens > 128000:
    print("The output is too long for this model.")
else:
    print(themes_output)

In [None]:
# Define the template for synthesizing overarching themes based on identified themes
synthesis_template = ("""
Consider the research topic: {topic}

For the process of thematic analysis regarding the research topic,
You will receive the input: A list of themes derived from coded segments from various 
related research documents.
Review the list of identified themes and synthesize a final list of overarching themes that 
address the research topic. 
Keep track of the context, document your rationale, and 
include references to the document numbers where each theme is found.

List of themes input:
{themes}

""")

# Format the template with the specific research topic and a placeholder for themes
synthesis_template_formatted = synthesis_template.format(topic=synthesis_task.topic, themes="{themes}")

# Create a ChatPromptTemplate instance using the formatted synthesis template
synthesis_prompt = ChatPromptTemplate.from_template(synthesis_template_formatted)

# Chain the prompt, GPT-4 model, and output parser to create the synthesis chain
synthesis_chain = synthesis_prompt | gpt4_model | str_output_parser

In [None]:
# Invoke the synthesis chain to generate a synthesis result based on the themes output
synthesis_result = synthesis_chain.invoke(themes_output)
print(synthesis_result)

In [None]:
# Save the results into a new file with added "_finished" to file name
synthesis_task.synthesis_result = synthesis_result
with open("synthesis_task_finished.pkl", "wb") as file:
    pickle.dump(synthesis_task, file)

Optional: The synthesis result along with the codes and themes of individual documents can now be viewed by running this script from this point onward, without having to run from the beginning of this .ipynb file

In [1]:
import pickle
from dokument import Dokument
from synthesis import Synthesis

# load your Synthesis object with existing .topic and .eligible_documents attribute
with open("synthesis_task_finished.pkl", "rb") as file:
    synthesis_task_result = pickle.load(file)
print("Research topic is:", synthesis_task_result.topic)

Research topic is: how accessibility can be improved using mixed reality?


In [2]:
# index of document to view codes from
index = 1
print("Codes of Document", index, "with DOI", synthesis_task_result.eligible_documents[index].DOI, ":")
print(synthesis_task_result.eligible_documents[index].codes)

Codes of Document 1 with DOI 10.1145/2893182 :
**Code Name**: Mixed Reality Gaming Experience
**Code Definition**: Describes the overall experience and engagement of users with mixed reality games, specifically focusing on adult powered chair users.
**Example**: "Findings from two exploratory field studies and a post hoc observer survey show that adult powered chair users found the game to be entertaining and used a variety of path strategies as they learned to play the game."

**Code Name**: Accessibility in Gaming
**Code Definition**: Refers to the design and implementation of game features that make them accessible and inclusive for users with diverse abilities, particularly those using powered mobility aids.
**Example**: "An initial set of theoretically and empirically informed guidelines for making mobile mixed reality games accessible to adult powered chair users with diverse abilities is proposed."

**Code Name**: Social Interaction Through Gaming
**Code Definition**: Highlights

In [5]:
# index of document to view themes from
index = 0
print("Themes of Document", index, "with DOI", synthesis_task_result.eligible_documents[index].DOI, ":")
print(synthesis_task_result.eligible_documents[index].themes)

Themes of Document 0 with DOI 10.1145/2702613.2702626 :
**Theme Name**: Enhancing Accessibility through Technological Innovation

**Description**: This theme encapsulates the development and application of Sensory Substitution Devices (SSDs) and mixed reality technologies to improve accessibility for individuals with sensory impairments. It highlights the transformative potential of these technologies in creating more inclusive virtual environments and interfaces, thereby enabling users, especially those with visual impairments, to better navigate, understand, and interact with digital spaces. This theme underscores the importance of technological advancements in bridging accessibility gaps and enhancing the user experience for individuals with disabilities.

**Theme Name**: User-Centric Design and Feedback Integration

**Description**: Central to improving accessibility with mixed reality is the emphasis on user-centered design and the integration of feedback from users with disabilit

In [4]:
print("Synthesis result from processing", len(synthesis_task_result.eligible_documents), "documents:")
print(synthesis_task_result.synthesis_result)

Synthesis result from processing 9 documents:
Based on the input themes from the various documents, we can synthesize a final list of overarching themes that address the research topic of improving accessibility using mixed reality. The synthesis process involves identifying commonalities, merging similar themes, and highlighting unique contributions from each document to ensure a comprehensive understanding of the topic. Here is the synthesized list of overarching themes:

### 1. **Enhancing Accessibility through Mixed Reality Innovations**
- **Sub-themes**: Technological Innovation (Doc 1), Enhancing Accessibility Through Design (Doc 2), Enhancing Accessibility in Mixed Reality (Doc 3), Enhancing Accessibility through Sensory Adaptation (Doc 4), Enhancing VR Accessibility for Low Vision Users (Doc 5), Enhancing Navigational Accessibility through Mixed Reality (Doc 9).
- **Rationale**: This theme consolidates the various technological advancements and innovations across documents that