### Step 1

Initialize the notebook, loading the configuration and importing libraries.

In [1]:
# Import the libraries required for the notebook
import asyncio

# Instatiate the config class
import sys
sys.path.append('..')
from config.notebook_config import *
config = notebook_config()
# Load config from file
config.load_config_from_file()


### Step 2

Load the data from a Word file, and store it in a data dictionary.

Replace filename by the fullpath to the Word file.

In [None]:
from docx import Document

# Load the document
document = Document("mydoc.docx")

# Initialize an empty list for sessions and variables for the current session
sessions = []
current_session = None
current_section = None

# Define the section headers we're looking for
sections = {
    "Author information:": "authorInfo",
    "Affiliations:": "affiliations",
    "Abstract:": "abstract",
    "Session notes:": "sessionNotes",
    "Transcript:": "transcript"
}

# Iterate through paragraphs in the document
for paragraph in document.paragraphs:
    # Check if paragraph is a new session heading
    if paragraph.style.style_id == "Heading2":
        # If there is a current session, append it to the sessions list
        if current_session is not None:
            sessions.append(current_session)
        
        # Start a new session dictionary
        current_session = {
            "title": paragraph.text,
            "authorInfo": "",
            "affiliations": "",
            "abstract": "",
            "sessionNotes": "",
            "transcript": ""
        }
        current_section = None
    
    # Check if the paragraph text starts with any of the defined section headers
    elif any(paragraph.text.startswith(header) for header in sections.keys()):
        # Set the current section based on the detected header
        for header, key in sections.items():
            if paragraph.text.startswith(header):
                current_section = key
                # Remove the header text to only store the content
                current_session[current_section] = paragraph.text.replace(header, "").strip()
                break
    
    # Append content to the current section
    elif current_section is not None and current_session is not None:
        # Append text with a new line for readability
        current_session[current_section] += paragraph.text + "\n"

# Append the last session if it exists
if current_session is not None:
    sessions.append(current_session)

# Display session titles and their structured content for verification
print(f"Found {len(sessions)} sessions")
for i, session in enumerate(sessions):
    print(f"{i}: {session['title']}")
    print(f"Author Info: {session['authorInfo']}")
    print(f"Affiliations: {session['affiliations']}")
    print(f"Abstract: {session['abstract']}")
    print(f"Session Notes: {session['sessionNotes']}")
    print(f"Transcript: {session['transcript']}")
    print("-" * 40)


### Step 3

Initiate the Semantic Kernel and fire-up with OpenAI model.

In [None]:
# Load Semantic Kernel
import semantic_kernel
import semantic_kernel as sk
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion

kernel = semantic_kernel.Kernel()
print("Kernel loaded.")

deployment = config.model
endpoint = config.endpoint
api_key = config.azure_api_key
kernel.add_chat_service("dv", AzureChatCompletion(deployment, endpoint, api_key))
print(f"Fire-up the kernel with {deployment}")

### Step 4

Load plug-ins required to curate the congress notes.

In [None]:
plugins_directory = "../plugins"

# Import the semantic functions
my_functions = kernel.import_semantic_skill_from_directory(plugins_directory, "CreateReport")

print("Plugins loaded.")

### Step 5

Execute the "curation" process:
* First it will replace abbreviations and acronyms by their full meaning
* Then it will summarize the notes
* It will then extract the key concepts, in a report format
* Finally it will translate to Spanish

In [None]:
import openai
import time

def load_credentials(filename):
    with open(filename) as cred_file:
        return json.load(cred_file)

openai_keys_file = "../credentials_openai.json"
openai_model_info = load_credentials(openai_keys_file)

session_id = 0
# Concatenate the necessary session details to provide full context for summarization
input_str = (
    f"Author information: {sessions[session_id]['authorInfo']}\n"
    f"Affiliations: {sessions[session_id]['affiliations']}\n"
    f"Abstract: {sessions[session_id]['abstract']}\n"
    f"Transcript: {sessions[session_id]['transcript']}\n"
)

openai.api_type = openai_model_info["api_type"]
openai.api_key = openai_model_info["api_key"]
openai.api_base = openai_model_info["endpoint"]
openai.api_version = openai_model_info["model_version"]

try:
    response = openai.ChatCompletion.create(
        deployment_id=openai_model_info["deployment_id"],
        messages=[
            {"role": "system", "content": "You are an AI expert in Rett Syndrome, specialized on generating summaries of scientific sessions."},
            {
            "role": "user", 
            "content": f"""
            
            Summarize the following transcript of a session presented at the Rett Syndrome World Congress.
            The goal is to provide a comprehensive yet concise overview of the session.
            Please ensure the summary is suitable for individuals with a general knowledge of Rett Syndrome,
            focusing on aspects that would be meaningful for families.

            {input_str}
            """
            }
        ],
        max_tokens=500,
        temperature=0.3
    )
    summary = response.choices[0].message["content"].strip()

except openai.error.RateLimitError as e:
    retry_after = int(e.headers.get("Retry-After", 8))
    print(f"Rate limited. Retrying in {retry_after} seconds.")
    time.sleep(retry_after)
except openai.error.OpenAIError as e:
    print(f"Error generating analysis: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")


print(summary)


try:
    response = openai.ChatCompletion.create(
        deployment_id=openai_model_info["deployment_id"],
        messages=[
            {"role": "system", "content": "You are an AI expert in Rett Syndrome, specialized on generating summaries of scientific sessions."},
            {
            "role": "user", 
            "content": f"""
            
            Based on the following summary of a session from the Rett Syndrome World Congress, create a list of key takeaways for families. The takeaways should be easy to understand and focus on practical insights or important points that may impact daily life, caregiving, or understanding of Rett Syndrome. Use bullet points to make the takeaways clear and actionable.

            {summary}
            """
            }
        ],
        max_tokens=500,
        temperature=0.3
    )
    key_take_aways = response.choices[0].message["content"].strip()

except openai.error.RateLimitError as e:
    retry_after = int(e.headers.get("Retry-After", 8))
    print(f"Rate limited. Retrying in {retry_after} seconds.")
    time.sleep(retry_after)
except openai.error.OpenAIError as e:
    print(f"Error generating analysis: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")


print(key_take_aways)

In [None]:
from semantic_kernel import ContextVariables

session_id = 0
# Concatenate the necessary session details to provide full context for summarization
input_str = (
    f"Author information: {sessions[session_id]['authorInfo']}\n"
    f"Affiliations: {sessions[session_id]['affiliations']}\n"
    f"Abstract: {sessions[session_id]['abstract']}\n"
    f"Transcript: {sessions[session_id]['transcript']}\n"
)

# Run the summarization task with the enriched input
step1 = asyncio.create_task(kernel.run_async(my_functions["Summarize"], input_str=input_str))
step1_result = await step1

print(step1_result.result)

In [None]:
from semantic_kernel import ContextVariables

session_id = 0
input_str = sessions[session_id]["transcript"]
print(f"Input string: {input_str}")

# Review notes from the previous step, and summarize the notes
step1 = asyncio.create_task(kernel.run_async(my_functions["NoteReview"], input_str=input_str))
step1_result = await step1

print(step1_result.result)

# If the initial notes are not good enough, terminate the process
if step1_result.result == "TERMINATE PROCESS":
    print("Process terminated")
else:
    # Replace abbreviations and acronyms from the input text
    step2 = asyncio.create_task(kernel.run_async(my_functions["AbbreviationExpansion"], input_str=step1_result.result))
    step2_result = await step2
    print("Abbreviations expansion result:")
    print(step2_result.result)

    # Create the final session report, after the previous steps
    step3 = asyncio.create_task(kernel.run_async(my_functions["Summarize"], input_str=step2_result.result))
    step3_result = await step3
    print("Report result:")
    print(step3_result.result)

    # Translate to Spanish
    context = ContextVariables()
    context["language"] = "spanish"
    context["input"] = step3_result.result
    translate = asyncio.create_task(kernel.run_async(my_functions["TranslateNotes"], input_vars=context))
    translate_result = await translate
    print("Translate result:")
    print(translate_result)

### Step 6

Repeat the previous step for the full document and save the document as output.

In [None]:
output_result = []

for session in sessions:
    input_str = session["content"]

    print(f"Processing session: {session['title']}")

    # Replace abbreviations and acronyms from the input text
    step1 = asyncio.create_task(kernel.run_async(my_functions["NoteReview"], input_str=input_str))
    step1_result = await step1

    if step1_result.result == "TERMINATE PROCESS":
        print("Process terminated")
        output_result.append({"title": session["title"], "content": "No notes available"})
    else:
        # Review notes from the previous step, and summarize the notes
        step2 = asyncio.create_task(kernel.run_async(my_functions["AbbreviationExpansion"], input_str=step1_result.result))
        step2_result = await step2

        # Create the final session report, after the previous steps
        step3 = asyncio.create_task(kernel.run_async(my_functions["Summarize"], input_str=step2_result.result))
        step3_result = await step3

        # Translate to Spanish
        context = ContextVariables()
        context["language"] = "spanish"
        context["input"] = step3_result.result
        translate = asyncio.create_task(kernel.run_async(my_functions["TranslateNotes"], input_vars=context))
        translate_result = await translate

        output_result.append({"title": session["title"], "content": translate_result.result})

    # Save the output to a Word document
    output_document = Document()

for session in output_result:
    output_document.add_heading(session["title"], level=1)
    output_document.add_paragraph(session["content"])

output_document.save("output.docx")