In [None]:
import os
from spire.presentation.common import *
from spire.presentation import *

# Create a Presentation object
presentation = Presentation()

# Load a PowerPoint presentation
presentation.LoadFromFile("Project Delivery.pptx")

# Create the Output directory if it doesn't exist
output_directory = "Output"
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Loop through the slides in the presentation
for i, slide in enumerate(presentation.Slides):
    # Specify the output file name
    fileName = f"{output_directory}/ToImage_{i}.png"  # Use forward slashes
    print("Saving slide", i, "to", fileName)  # Print debug information
    # Save each slide as a PNG image
    image = slide.SaveAsImage()
    image.Save(fileName)
    image.Dispose()

presentation.Dispose()

In [None]:
import base64
image_elements = []
output_path = "C:\\Users\\DELL\\PDF_Chat_MM\\Output"

# Function to encode images
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
    return encoded_image

for image_file in os.listdir(output_path):
    if image_file.endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(output_path, image_file)
        encoded_image = encode_image(image_path)
        image_elements.append(encoded_image)
print(len(image_elements))

In [None]:
from langchain_openai import ChatOpenAI
from langchain_community.llms import openai
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from langchain.schema.messages import HumanMessage, AIMessage
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

chain_gpt = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=1024)
chain_gemini_vision = ChatGoogleGenerativeAI(model="gemini-pro-vision",max_output_tokens=1024)

# Function for image content
def content_image(encoded_image):
    prompt = HumanMessage(
        content=[
            {"type": "text", "text": "Extract full information in the image.If there is table you need to read it as a table."},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{encoded_image}"
                },
            },
        ]
    )
    response = chain_gemini_vision.invoke([prompt])
    return response.content

# Function for image summaries
def summarize_image(encoded_image):
    prompt = HumanMessage(
        content=[
            {"type": "text", "text": "Describe the contents of this image."},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{encoded_image}"
                },
            },
        ]
    )
    response = chain_gemini_vision.invoke([prompt])
    return response.content

In [None]:
# Processing image elements with feedback and sleep
image_summaries = []
image_content=[]
for i, ie in enumerate(image_elements):
    summary = summarize_image(ie)
    image_summaries.append(summary)
    contents = content_image(ie)
    image_content.append(contents)

    print(f"{i + 1}th element of images processed.")
    print(summary)

    print(f"{i + 1}th element of images processed.")
    print(contents)
    

In [None]:
import uuid

from langchain_openai import OpenAIEmbeddings
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.schema.document import Document
from langchain.storage import InMemoryStore
from langchain_community.vectorstores import Chroma



# Initialize the vector store and storage layer
vectorstore = Chroma(collection_name="summaries", embedding_function=OpenAIEmbeddings())
store = InMemoryStore()
id_key = "doc_id"

# Initialize the retriever
retriever = MultiVectorRetriever(vectorstore=vectorstore, docstore=store, id_key=id_key)

# Function to add documents to the retriever
def add_documents_to_retriever(summaries, original_contents):
    doc_ids = [str(uuid.uuid4()) for _ in summaries]
    summary_docs = [
        Document(page_content=s, metadata={id_key: doc_ids[i]})
        for i, s in enumerate(summaries)
    ]
    retriever.vectorstore.add_documents(summary_docs)
    retriever.docstore.mset(list(zip(doc_ids, original_contents)))

In [None]:
# Add image summaries
add_documents_to_retriever(image_summaries, image_content) # hopefully real images soon

In [None]:
# We can retrieve this table
retriever.get_relevant_documents(
    "what is the global market size in asia in 2020?"
)

In [None]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

template = """Answer the question based only on the following context, which can include text, images and tables:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [None]:
chain.invoke(
     """ what is the global market size in asia in 2020?
     use the following information to answer the question:
[' # Global Market Size\nThe image shows the global market size from 2015 to 2023. The market size is expected to grow from 8 billion USD in 2015 to 15 billion in 2023. The Americas region is expected to remain the largest market, followed by the Asia region.\n\n## Market Size Global ( in Billion USD)\n| Year | Americas | EMEA | Asia | Far East | Total |\n|---|---|---|---|---|---|\n| 2015 | 4.0 | 1.8 | 0.8 | 1.4 | 8.0 |\n| 2016 | 4.8 | 2.1 | 1.7 | 1.2 | 9.5 |\n| 2017 | 5.3 | 1.9 | 2.3 | 1.1 | 10.5 |\n| 2018 | 6.0 | 2.2 | 2.6 | 1.2 | 12.0 |\n| 2019 | 7.0 | 2.5 | 3.1 | 1.4 | 14.0 |\n| 2020 | 6.5 | 2.3 | 2.9 | 1.3 | 13.0 |\n| 2021 | 6.0 | 2.2 | 2.6 | 1.2 | 12.0 |\n| 2022 | 6.8 | 2.2 | 3.0 | 1.4 | 13.5 |\n| 2023 | 7.5 | 2.3 | 3.3 | 1.5 | 15.0 |',
 ' # Annual Revenue\n\n| Year | Americas | EMEA | Asia | Far East | Total |\n|---|---|---|---|---|---|\n| 2015 | 1.29 | 0.52 | 0.52 | 0.26 | 2.58 |\n| 2016 | 1.54 | 0.61 | 0.61 | 0.31 | 3.07 |\n| 2017 | 1.79 | 0.72 | 0.72 | 0.36 | 3.58 |\n| 2018 | 1.98 | 0.79 | 0.79 | 0.40 | 3.97 |\n| 2019 | 2.18 | 0.87 | 0.87 | 0.44 | 4.35 |\n| 2020 | 2.27 | 0.91 | 0.91 | 0.45 | 4.54 |\n| 2021 | 2.09 | 0.84 | 0.84 | 0.42 | 4.18 |\n| 2022 | 2.35 | 0.94 | 0.94 | 0.47 | 4.69 |\n| 2023 | 2.56 | 1.02 | 1.02 | 0.51 | 5.11 |',
 ' Project Delivery Team\n1. Program Director/Delivery Manager\n2. Program Manager\n3. Enterprise Architect/Solution Architects\n4. Team Leads\n5. Developers/Testers',
 ' SDLC Waterfall Framework\n\n1. Project Kick off\n2. Requirements Analysis\n3. Design\n4. HLD\n5. LLD\n6. Development & Testing\n7. Deployment\n8. Maintenance']   """
)