# Graphics Idea Auto Generator

1.⁠ ⁠ask openai to generate a graphic idea description, prompt engineered for the text to diagram engine \
2.⁠ ⁠⁠auto-generate the diagram

## Using OpenAI To Generate a Graphic Idea

In [43]:
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
from openai import AzureOpenAI
import openai
from docx2python import docx2python
import re
import textwrap
import fitz 

In [3]:
load_dotenv()

True

In [4]:
# OpenAI API configuration
openai.api_type = "azure"
openai.api_key = os.getenv("api_key_azure")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_version = os.getenv("api_version")
openai_deployment = "sdgi-gpt-35-turbo-16k"


client = AzureOpenAI(
  api_key = os.getenv("api_key_azure"),  
  api_version = os.getenv("api_version"),
  azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT") 
)

In [5]:
# use this function to make simple openAI Calls
def callOpenAI(messages):  
    response_entities = openai.chat.completions.create(
                    model=openai_deployment,
                    temperature=0,
                    messages=messages
                )
    response = response_entities.choices[0].message.content
    return response

## Text Extraction from Module Dataset

In [6]:
module_file_path = '../data/raw_data/SEA Module 3 Draft_v4.docx'
output_file = '../data/extracted_data'

In [7]:
def docx_text_cleaning(text):
    # Remove footage or image indications
    cleaned_text = re.sub(r'----[^-]+----', '', text)

    # Remove lines with only whitespace characters
    cleaned_text = cleaned_text.replace('\r', '\n')
    cleaned_text = re.sub(r"\n", " ", cleaned_text)  # remove newlines
    cleaned_text = re.sub(r"\s\s+", ' ', cleaned_text)
    cleaned_text = textwrap.fill(cleaned_text, width=80) 
    return cleaned_text

In [8]:
def extract_text_docx(file_path):
    with docx2python(file_path) as docx_content:
        raw_text = docx_content.text
        cleaned_text = docx_text_cleaning(raw_text)
        return cleaned_text

In [9]:
filename = 'SEA Module 3 Draft_v4'
content = extract_text_docx(module_file_path)
output_path = output_file + "/" + filename
with open(output_path, "w") as f:
        f.write(content)

In [57]:
def generate_graphics_idea(text):
    messages = [
            {"role": "system", "content": f"""
            You are a UNDP module writer assistant. Read the provided passage and identify data points that would benefit from graphical representation to enhance understanding.
            Additionally, suggest ideas for infographics to improve the overall presentation of the text.
            Provide specific recommendations on how these graphics or infographics should be designed and what information they should convey.
            """},
            {"role": "user", "content": text}
    ]
    return callOpenAI(messages)

In [11]:
file = open('../data/extracted_data/SEA Module 3 Draft_v4', 'r')
content = file.read()
file.close()

In [None]:
generate_graphics_idea(content)

The whole text exceeds OpenAI's token limit. Consider breaking it down into smaller sections.

In [46]:
def extract_text_by_toc(pdf_path):
    document = fitz.open(pdf_path)
    toc = document.get_toc()
    
    toc_text = {}

    for index, toc_item in enumerate(toc):
        _, title, start_page = toc_item
        
        # Determine the end page
        if index + 1 < len(toc):
            _, _, next_start_page = toc[index + 1]
            end_page = next_start_page - 1
        else:
            end_page = len(document)

        # Extract text for the section
        text = ""
        for page_num in range(start_page - 1, end_page):
            text += document[page_num].get_text()

        # Store the text in the dictionary
        toc_text[title] = text

    return toc_text

In [49]:
# Usage example
pdf_path = "../data/raw_data/SEA Module 3 Draft_v4.pdf"
toc_text = extract_text_by_toc(pdf_path)

In [64]:
output_file = 'graphics_idea'
for title, text in toc_text.items():
    if 'References' in title:
        continue
    print(f"Generating Graphic Ideas of {title}")
    graphics_idea = generate_graphics_idea(text)
    output_path = output_file + "/" + title + ".txt"
    with open(output_path, "w") as f:
        content = f"""{text}
        
        Graphic Ideas:

        {graphics_idea}
        """
        f.write(content)

Generating Graphic Ideas of Module 3: Energy and Sustainable Development Nexus
Generating Graphic Ideas of 1. Interlinkages between Energy and other SDGs
Generating Graphic Ideas of 1.1. SDG 7.1 Access to electricity and clean cooking
Generating Graphic Ideas of 1.1.1. Maximizing Synergies in Sustainable Energy Access
Generating Graphic Ideas of 1.1.2. Overcoming Energy Access Trade-offs
Generating Graphic Ideas of 1.2. SDG 7.2 Renewable energy
Generating Graphic Ideas of 1.2.1. Advancing Sustainable Prosperity with Renewable Energy
Generating Graphic Ideas of 1.2.2. Managing Trade-offs in Renewable Energy for Sustainable Development
Generating Graphic Ideas of 1.3. SDG 7.3 Energy efficiency
Generating Graphic Ideas of 1.3.1. Catalyzing Sustainability through Energy Efficiency
Generating Graphic Ideas of 1.3.2. Developing Informed Strategies for Energy Efficiency Trade-offs
Generating Graphic Ideas of 2. Approaches, Methods and Tools for Strategic Innovation
Generating Graphic Ideas of