In [20]:
from pptx import Presentation
from PIL import Image
import os
import pandas as pd
import matplotlib.pyplot as plt

# Function to extract text from a slide
def extract_text(slide):
    text = []
    for shape in slide.shapes:
        if hasattr(shape, 'text') and shape.text:
            text.append(shape.text.strip())
    return text

# Function to extract images and charts from a slide
def extract_images_and_charts(slide, output_folder):
    image_paths = []
    chart_paths = []
    for idx, shape in enumerate(slide.shapes):
        if shape.shape_type == 13:  # 13 refers to picture (image)
            image = shape.image
            image_bytes = image.blob
            img_path = os.path.join(output_folder, f'image_{idx}.png')
            with open(img_path, 'wb') as f:
                f.write(image_bytes)
            image_paths.append(img_path)
        elif shape.has_chart:
            chart = shape.chart
            chart_data = extract_chart_data(chart)
            img_path = os.path.join(output_folder, f'chart_{idx}.png')
            recreate_chart(chart_data, img_path)
            chart_paths.append(img_path)
    return image_paths, chart_paths

# Function to extract tables from a slide
def extract_tables(slide):
    tables = []
    for shape in slide.shapes:
        if shape.has_table:
            table = shape.table
            data = []
            for row in table.rows:
                row_data = [cell.text.strip() for cell in row.cells]
                data.append(row_data)
            df = pd.DataFrame(data[1:], columns=data[0])  # Assuming the first row contains column headers
            tables.append(df)
    return tables

# Function to extract chart data from a chart shape
def extract_chart_data(chart):
    chart_data = {}
    for idx, series in enumerate(chart.series):
        categories = [str(category) for category in chart.plots[0].categories]
        values = [value for value in series.values]
        chart_data[f'Series {idx + 1}'] = dict(zip(categories, values))
    return chart_data


# Function to recreate chart using matplotlib
def recreate_chart(chart_data, output_path):
    df = pd.DataFrame(chart_data)
    ax = df.plot(kind='bar')  # You can change the chart type as needed
    plt.xlabel('X Axis')
    plt.ylabel('Y Axis')
    plt.title('Chart Title')
    plt.legend(title='Legend')
    plt.savefig(output_path)
    plt.close()

# Load the PowerPoint presentation
pr_path = 'Project Delivery.pptx'  # Replace with your PPT, PPTX, or PPTM file path
pr = Presentation(pr_path)

# Extracting all elements from each slide
output_folder = 'output'
os.makedirs(output_folder, exist_ok=True)

all_text = []
all_images = []
all_tables = []
all_charts = []

for slide in pr.slides:
    slide_text = extract_text(slide)
    all_text.extend(slide_text)
    
    slide_images, slide_charts = extract_images_and_charts(slide, output_folder)
    all_images.extend(slide_images)
    all_charts.extend(slide_charts)
    
    all_tables.extend(extract_tables(slide))

# Perform OCR on images to extract text (if needed)
# You can use OCR libraries like pytesseract for this purpose

# Display or process the extracted elements as needed
print("Extracted Text:")
print(all_text)
print("\nExtracted Tables:")
for idx, table_df in enumerate(all_tables):
    print(f"Table {idx + 1}:")
    print(table_df)
print("\nImage paths:")
print(all_images)
print("\nChart paths:")
print(all_charts)


Extracted Text:
['Your Logo', 'Project Delivery Team', 'Program Manager', 'SDLC Waterfall Framework', 'HLD\n\nLLD', 'Requirements\nAnalysis', 'Design', 'Development\n& Testing', 'Deployment', 'Maintenance', 'Project Kick off', 'SDLC Agile Framework', 'Annual Revenues', 'Global Market Size']

Extracted Tables:
Table 1:
    Year  Revenues                       Billion US$
0         Americas   EME  Asia  Far East       Total
1                                                   
2   2015      1.29  0.52  0.52      0.26        2.58
3   2016      1.54  0.61  0.61      0.31        3.07
4   2017      1.79  0.72  0.72      0.36        3.58
5   2018      1.98  0.79  0.79      0.40        3.97
6   2019      2.18  0.87  0.87      0.44        4.35
7   2020      2.27  0.91  0.91      0.45        4.54
8   2021      2.09  0.84  0.84      0.42        4.18
9   2022      2.35  0.94  0.94      0.47        4.69
10  2023      2.56  1.02  1.02      0.51        5.11

Image paths:
['output\\image_0.png', 'outpu

In [35]:
import base64
image_elements = []
output_path = "C:\\Users\\DELL\\PDF_Chat_MM\\output"

# Function to encode images
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
    return encoded_image

for image_file in os.listdir(output_path):
    if image_file.endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(output_path, image_file)
        encoded_image = encode_image(image_path)
        image_elements.append(encoded_image)
print(len(image_elements))

3


In [36]:
from langchain_openai import ChatOpenAI
from langchain_community.llms import openai
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from langchain.schema.messages import HumanMessage, AIMessage
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

chain_gpt = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=1024)
chain_gemini_vision = ChatGoogleGenerativeAI(model="gemini-pro-vision",max_output_tokens=1024)

# Function for text summaries
def summarize_text(text_element):
    prompt = f"Summarize the following text:\n\n{text_element}\n\nSummary:"
    response = chain_gpt.invoke([HumanMessage(content=prompt)])
    return response.content

# Function for table summaries
def summarize_table(table_element):
    prompt = f"Summarize the following table:\n\n{table_element}\n\nSummary:"
    response = chain_gpt.invoke([HumanMessage(content=prompt)])
    return response.content

# Function for image summaries
def summarize_image(encoded_image):
    prompt = HumanMessage(
        content=[
            {"type": "text", "text": "Describe the contents of this image."},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{encoded_image}"
                },
            },
        ]
    )
    response = chain_gemini_vision.invoke([prompt])
    return response.content

In [37]:
# Processing text elements with feedback and sleep
text_summaries = []
for i, te in enumerate(all_text):
    summary = summarize_text(te)
    text_summaries.append(summary)
    print(f"{i + 1}th element of texts processed.")
    print(summary)

1th element of texts processed.
Creating a logo for your business is an important step in establishing your brand identity. A well-designed logo can help customers easily recognize and remember your business. It should be simple, memorable, and convey the essence of your brand. Additionally, it should be versatile and work well across different mediums and sizes. Your logo is an essential part of your overall branding strategy.
2th element of texts processed.
The text likely discusses the role and responsibilities of a project delivery team. This team is typically responsible for overseeing the successful completion of a project, ensuring it is delivered on time, within budget, and meets the specified requirements. Members of the team may include project managers, engineers, designers, and other professionals who work together to achieve project goals. The text may also highlight the importance of effective communication and collaboration within the team to ensure project success.
3th 

In [39]:
# Processing table elements with feedback and sleep
table_summaries = []
for i, te in enumerate(all_tables):
    summary = summarize_table(te)
    table_summaries.append(summary)
    print(f"{i + 1}th element of tables processed.")
    print(summary)

1th element of tables processed.
The table shows the revenues in billion US dollars for different regions (Americas, EME, Asia, Far East) from 2015 to 2023, as well as the total revenues for each year. Revenues have been increasing steadily over the years, with the total revenues reaching 5.11 billion US dollars in 2023.


In [40]:
# Processing image elements with feedback and sleep
image_summaries = []
for i, ie in enumerate(image_elements):
    summary = summarize_image(ie)
    image_summaries.append(summary)
    print(f"{i + 1}th element of images processed.")
    print(summary)

1th element of images processed.
 This is a slide from a PowerPoint presentation. The slide contains a purple and gray puzzle piece in the center. The background is white. There is a notes section on the right side of the slide. The slide is titled "Slide 11 of 11" and has the subtitle "Office Theme."
2th element of images processed.
 The image contains a چرخه توسعه نرم افزار چابک (Agile software development cycle). The cycle is a continuous process that involves six steps:
1. Plan: In this step, the project team gathers requirements from the customer and creates a project plan.
2. Design: In this step, the team designs the software architecture and creates a detailed design document.
3. Develop: In this step, the team develops the software code.
4. Test: In this step, the team tests the software to ensure that it meets the requirements.
5. Evaluate: In this step, the team evaluates the software and makes any necessary changes.
6. Meet: In this step, the team meets with the customer to

In [41]:
import uuid

from langchain_openai import OpenAIEmbeddings
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.schema.document import Document
from langchain.storage import InMemoryStore
from langchain_community.vectorstores import Chroma



# Initialize the vector store and storage layer
vectorstore = Chroma(collection_name="summaries", embedding_function=OpenAIEmbeddings())
store = InMemoryStore()
id_key = "doc_id"

# Initialize the retriever
retriever = MultiVectorRetriever(vectorstore=vectorstore, docstore=store, id_key=id_key)

# Function to add documents to the retriever
def add_documents_to_retriever(summaries, original_contents):
    doc_ids = [str(uuid.uuid4()) for _ in summaries]
    summary_docs = [
        Document(page_content=s, metadata={id_key: doc_ids[i]})
        for i, s in enumerate(summaries)
    ]
    retriever.vectorstore.add_documents(summary_docs)
    retriever.docstore.mset(list(zip(doc_ids, original_contents)))

In [42]:
# Add text summaries
add_documents_to_retriever(text_summaries, all_text)

# Add table summaries
add_documents_to_retriever(table_summaries, all_tables)

# Add image summaries
add_documents_to_retriever(image_summaries, image_summaries) # hopefully real images soon

In [43]:
# We can retrieve this table
retriever.get_relevant_documents(
    " what is the anual revenue of asia in 2020?"
)

[    Year  Revenues                       Billion US$
 0         Americas   EME  Asia  Far East       Total
 1                                                   
 2   2015      1.29  0.52  0.52      0.26        2.58
 3   2016      1.54  0.61  0.61      0.31        3.07
 4   2017      1.79  0.72  0.72      0.36        3.58
 5   2018      1.98  0.79  0.79      0.40        3.97
 6   2019      2.18  0.87  0.87      0.44        4.35
 7   2020      2.27  0.91  0.91      0.45        4.54
 8   2021      2.09  0.84  0.84      0.42        4.18
 9   2022      2.35  0.94  0.94      0.47        4.69
 10  2023      2.56  1.02  1.02      0.51        5.11,
 'Annual Revenues']

In [46]:
# we can retrieve this diagram text
retriever.get_relevant_documents(
    " what are the steps in agile framework?"
)

['SDLC Agile Framework',
 ' The image contains a چرخه توسعه نرم افزار چابک (Agile software development cycle). The cycle is a continuous process that involves six steps:\n1. Plan: In this step, the project team gathers requirements from the customer and creates a project plan.\n2. Design: In this step, the team designs the software architecture and creates a detailed design document.\n3. Develop: In this step, the team develops the software code.\n4. Test: In this step, the team tests the software to ensure that it meets the requirements.\n5. Evaluate: In this step, the team evaluates the software and makes any necessary changes.\n6. Meet: In this step, the team meets with the customer to review the software and get feedback.']

In [56]:
# we can retrieve this tediagram text
retriever.get_relevant_documents(
    "what are things contained by the images and what they explaining?"
)

[' This is a screenshot of a PowerPoint slide. The slide contains a diagram of two puzzle pieces, one blue and one green. The pieces are interlocked.']

In [60]:
# we can retrieve this text
retriever.get_relevant_documents(
    "what are roles involved in project delivery team?"
)

['Project Delivery Team', 'Project Kick off']

In [32]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

template = """Answer the question based only on the following context, which can include text, images and tables:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [49]:
chain.invoke(
     """what is the anual revenue of america in 2020?
     use the following information to answer the question:
[    Year  Revenues                       Billion US$
 0         Americas   EME  Asia  Far East       Total
 1                                                   
 2   2015      1.29  0.52  0.52      0.26        2.58
 3   2016      1.54  0.61  0.61      0.31        3.07
 4   2017      1.79  0.72  0.72      0.36        3.58
 5   2018      1.98  0.79  0.79      0.40        3.97
 6   2019      2.18  0.87  0.87      0.44        4.35
 7   2020      2.27  0.91  0.91      0.45        4.54
 8   2021      2.09  0.84  0.84      0.42        4.18
 9   2022      2.35  0.94  0.94      0.47        4.69
 10  2023      2.56  1.02  1.02      0.51        5.11,
 'Annual Revenues'] """
)

'The annual revenue of Americas in 2020 was 2.27 Billion US$.'

In [58]:
chain.invoke(
     """what are things contained by the images and what they explaining?
     use the following information to answer the question:
['SDLC Agile Framework',
 ' The image contains a چرخه توسعه نرم افزار چابک (Agile software development cycle). The cycle is a continuous process that involves six steps:\n1. Plan: In this step, the project team gathers requirements from the customer and creates a project plan.\n2. Design: In this step, the team designs the software architecture and creates a detailed design document.\n3. Develop: In this step, the team develops the software code.\n4. Test: In this step, the team tests the software to ensure that it meets the requirements.\n5. Evaluate: In this step, the team evaluates the software and makes any necessary changes.\n6. Meet: In this step, the team meets with the customer to review the software and get feedback.'] """
)

'The image contains a چرخه توسعه نرم افزار چابک (Agile Software Development Cycle) which explains the six steps involved in the cycle: Plan, Design, Develop, Test, Evaluate, and Meet. Each step is described in detail, including the activities involved in each phase such as gathering requirements, creating a project plan, designing software architecture, developing software code, testing the software, evaluating it, and meeting with the customer for feedback. The cycle is a continuous process designed to be flexible and adaptable to changes in requirements or technology.'

In [59]:
chain.invoke(
     """what are things contained by the images and what they explaining?
     use the following information to answer the question:
[' This is a screenshot of a PowerPoint slide. The slide contains a diagram of two puzzle pieces, one blue and one green. The pieces are interlocked.'] """
)

'The images contain diagrams of puzzle pieces, one blue and one green, that are interlocked. They are explaining the concept of teamwork, collaboration, or problem-solving in a business context.'

In [None]:
chain.invoke(
     """what are roles involved in project delivery team?
     use the following information to answer the question:
[' This is a screenshot of a PowerPoint slide. The slide contains a diagram of two puzzle pieces, one blue and one green. The pieces are interlocked.'] """
)