In [15]:
from dotenv import load_dotenv

load_dotenv()

True

In [16]:
system_prompt = """You are given:
1. **One slide image** (containing text or visuals).  
2. **One transcript** of a lecture or presentation.  

Your goal:
Identify the **exact position (even mid-sentence)** in the transcript where the content of the slide **first begins to be discussed**.  
Unlike before, you must capture **the latest possible starting phrase** that still represents the **first appearance** of that slide’s topic.  
This ensures you don’t include extra unrelated or leading sentences.

---

### Task Description
- Examine the **slide** to understand its **theme, title, and visuals** (keywords, diagrams, or main ideas).  
- Read the **transcript sequentially from the start**.  
- Detect the **first appearance** of the slide’s concept in the transcript.  
- Within that appearance, select the **last possible word, phrase, or sub-sentence** that still correctly marks the true start of that slide’s topic.  
  - Example behavior: if the topic begins being hinted at in a sentence but only becomes explicit at the end, use the **explicit phrase position**, not the start of the sentence.  

---

### Matching Rules
- Use **semantic understanding** — match meaning, not just exact words.  
- The match point may occur:
  - At the start of a sentence, or  
  - **Mid-sentence or at the last clause** when the slide’s theme becomes explicit.  
- Do **not** include earlier context sentences or unrelated setup lines before the concept is directly introduced.  
- Always favor **precision** (the last point of transition) over **completeness** (whole sentence inclusion).  
- Ignore incidental or vague mentions that are not clearly the beginning of the slide’s subject.

---

### Output Requirements
Return a **strict JSON object** with:
1. The **character index** (0-based) where the slide topic begins (even mid-sentence).  
2. The **short matching excerpt**, containing only the **smallest portion** of text that marks this exact start.

---

### Output Format
```json
{
  "slide_title": "Extracted or provided title of the slide",
  "detected_start_position": {
    "char_index": 15724
  },
  "matching_excerpt": "…divided into yin force and yang force…"
}
"""

In [17]:
from pypdf import PdfReader

transcripts = []
reader = PdfReader("test_dir\Lesson  1_Wealth Qi_transcript.docx.pdf")
for page in reader.pages:
    transcripts.append(page.extract_text().replace("\n", "").replace("  ", " "))

transcript = "".join(transcripts)
transcript

"Lesson 1_Wealth Qi Lesson 1_Wealth Qi: [00:00:00] Hey everyone, this is Joey Yap . Welcome to the seven Figure Feng Shui Playbook Training Program. Welcome. Now, in this training we're gonna talk about. Certain wealth related formations in Feng Shui. So I'm gonna take you on a journey to understanding how you could apply certain Feng Shui setups to facilitate your goal in enhancing your career, your wealth opportunities, all from the perspective of. \u200aFeng Shui applications. Okay. So of course the results will vary according to your Bazi and your efforts and your mindset and your, but let's try our best to improve our conditions no matter what. So when we say [00:01:00] seven figures. Basically we are referring to enhancing the capacity to improve your income. Now, to achieve that, we obviously will involve four specific aspects to work together in unison force to achieve this outcome, not just \u200aFeng Shui . Okay, so these four aspects include Heaven, earth, man, and spirit. H

In [18]:
import os
import base64
from openai import OpenAI
import json

client = OpenAI()
result = []

directory_path = "image_only_test\pdf_chunks_images"  # Replace with your actual path
# Loop through all files in the directory
directory_list = os.listdir(directory_path)
for index, filename in enumerate(directory_list):
  file_path = os.path.join(directory_path, filename)
  
  # Check if it's a file (not a subdirectory)
  if os.path.isfile(file_path):
    print(f"Processing file: {filename}")
    if index < 1:
      continue
    

    # Open and read the file
    try:
      with open(file_path, 'rb') as file:
        content = file.read()
        base64_image = base64.b64encode(content).decode("utf-8")
        user_message = [{
            "type": "input_text",
            "text": f"Here is the transcripts of the lecture\n{transcript}",
        },
        {
            "type": "input_text",
            "text": f"Here is the slide",
        },
        {
            "type": "input_image",
            "image_url": f"data:image/png;base64,{base64_image}",
        }]
        
      response = client.responses.parse(
        model="gpt-5",
        input=[
          {"role": "system", "content": system_prompt},
          {"role": "user", "content": user_message}
        ],
        reasoning={
            "effort": "minimal",
            "summary": None,
        },
      )
      response_json = json.loads(response.output[1].content[0].text)
      text_index = transcript.find(response_json["matching_excerpt"])
      print(directory_list[index-1])
      if text_index != -1:
          result.append({
              "slide": directory_list[index-1],
              "match": transcript[:text_index].strip()
          })
          transcript = transcript[text_index:].strip()
    
      else:
          print(f"Excerpt not found in transcript: {text_index}")
          result.append({
              "slide": directory_list[index-1],
              "match": ""
          })
      if index >= len(directory_list) - 1:
        result.append({
          "slide": directory_list[index],
          "match": transcript
        })
    except Exception as e:
      print(f"Error reading {filename}: {e}")

Processing file: image_0000_1.png
Processing file: image_0001_1.png
image_0000_1.png
Processing file: image_0002_1.png
image_0001_1.png
Processing file: image_0003_1.png
image_0002_1.png
Processing file: image_0004_1.png
image_0003_1.png
Processing file: image_0005_1.png
image_0004_1.png
Processing file: image_0006_1.png
image_0005_1.png
Processing file: image_0007_1.png
image_0006_1.png
Processing file: image_0008_1.png
image_0007_1.png
Processing file: image_0009_1.png
image_0008_1.png
Processing file: image_0010_1.png
image_0009_1.png
Processing file: image_0011_1.png
image_0010_1.png
Processing file: image_0012_1.png
image_0011_1.png
Excerpt not found in transcript: -1
Processing file: image_0013_1.png
image_0012_1.png
Processing file: image_0014_1.png
image_0013_1.png
Processing file: image_0015_1.png
image_0014_1.png
Processing file: image_0016_1.png
image_0015_1.png
Processing file: image_0017_1.png
image_0016_1.png
Processing file: image_0018_1.png
image_0017_1.png
Processing f

In [19]:
for item in result:
    print(item["slide"])
    print("\n")
    print(item["match"])
    print("\n------------------------------\n")

image_0000_1.png


Lesson 1_Wealth Qi Lesson 1_Wealth Qi: [00:00:00] Hey everyone, this is Joey Yap . Welcome to the seven Figure Feng Shui Playbook Training Program. Welcome. Now, in this training we're gonna talk about. Certain wealth related formations in Feng Shui. So I'm gonna take you on a journey to understanding how you could apply certain Feng Shui setups to facilitate your goal in enhancing your career, your wealth opportunities, all from the perspective of.  Feng Shui applications. Okay. So of course the results will vary according to your Bazi and your efforts and your mindset and your, but let's try our best to improve our conditions no matter what. So when we say [00:01:00] seven figures. Basically we are referring to enhancing the capacity to improve your income. Now, to achieve that, we obviously will involve four specific aspects to work together in unison force to achieve this outcome, not just  Feng Shui . Okay, so these four aspects include Heaven, earth, man, and s