In [1]:
from langchain_unstructured import UnstructuredLoader
loader_local = UnstructuredLoader(
    file_path="scr.pdf",
    strategy="hi_res",
)
docs_local = []
for doc in loader_local.lazy_load():
    docs_local.append(doc)

  from .autonotebook import tqdm as notebook_tqdm
INFO: pikepdf C++ to Python logger bridge initialized
INFO: Reading PDF for file: scr.pdf ...


In [10]:
len(docs_local)

137

In [8]:
print(docs_local)

[Document(metadata={'source': 'scr.pdf', 'detection_class_prob': 0.5372682809829712, 'coordinates': {'points': ((np.float64(475.09375), np.float64(385.0956232)), (np.float64(475.09375), np.float64(638.4495731999998)), (np.float64(1771.6287841796875), np.float64(638.4495731999998)), (np.float64(1771.6287841796875), np.float64(385.0956232))), 'system': 'PixelSpace', 'layout_width': 2200, 'layout_height': 1700}, 'last_modified': '2025-06-20T20:05:25', 'filetype': 'application/pdf', 'languages': ['eng'], 'page_number': 1, 'filename': 'scr.pdf', 'category': 'Title', 'element_id': '869444c6ec8cc84d1a13114a1379c3d3'}, page_content='Social Responsibilities of Business'), Document(metadata={'source': 'scr.pdf', 'detection_class_prob': 0.5192918181419373, 'coordinates': {'points': ((np.float64(191.4513397216797), np.float64(132.14817810058594)), (np.float64(191.4513397216797), np.float64(338.28213416000006)), (np.float64(2040.4267578125), np.float64(338.28213416000006)), (np.float64(2040.4267578

In [7]:
first_page_docs = [doc for doc in docs_local if doc.metadata.get("page_number") == 3]

for doc in first_page_docs:
    print(doc.page_content)

•According to Keith Davis, the term social responsibility refers to two types of business obligations:
•The socio-economic obligation: . Business should be carried out in such a manner that it becomes a profitable venture for everyone from the employees, investors, consumers, to the government and the general public.
•The socio-human obligation:
•The socio-human obligation of every business is to nurture and develop human values (such as morale, cooperation, motivation and self-realization in work
•The businessman should, therefore, consider the impact of his actions on all to which he is related
•His task is to mediate among these interests, to ensure that each gets a square deal and that nobody‟s interests are unduly sacrificed to those of others.


In [11]:
section_data =[]
section =""
for docs in docs_local:
    if docs.metadata.get("category") == "Title":
        
        section_data.append(section)
        section =""
        section+= docs.page_content + "\n"
        
    else:
        section += docs.page_content + "\n"

       
   

In [None]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer

# Initialize
model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)

# Suppose section_data is a list of section texts
all_chunks = []
raw_chunks =[]

for i, section in enumerate(section_data):
    if len(section) > 0:
        chunks = text_splitter.split_text(section)
        print(f"Section {i} has {len(chunks)} chunks")
        for j, chunk in enumerate(chunks):
            # Optional: Add metadata like section number
            all_chunks.append(Document(
                page_content=chunk,
                metadata={"section_id": i, "chunk_id": j}
            ))
            raw_chunks.append(chunk)

# Create the vector index
embeddings = model.embed_documents(raw_chunks)





INFO: Use pytorch device_name: cpu
INFO: Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Section 1 has 4 chunks
Section 2 has 2 chunks
Section 3 has 1 chunks
Section 4 has 3 chunks
Section 5 has 13 chunks
Section 6 has 1 chunks
Section 7 has 1 chunks
Section 8 has 4 chunks
Section 9 has 1 chunks
Section 10 has 1 chunks
Section 11 has 2 chunks
Section 12 has 4 chunks
Section 13 has 3 chunks
Section 14 has 2 chunks
Section 15 has 1 chunks
Section 16 has 1 chunks


' vectorindex = FAISS.from_documents(all_chunks, embeddings) '

In [None]:
#cluster embeddings
from sklearn.cluster import KMeans
num_topics = 10
kmeans = KMeans(n_clusters=num_topics, random_state=42)
labels = kmeans.fit_predict(embeddings)


In [14]:
import os
import dotenv
dotenv.load_dotenv()
from langchain_groq import ChatGroq
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=700,
    timeout=None,
    max_retries=2,
) 

In [15]:
import re
cluster_topic_titles = {}
for cluster_id in set(labels):
    rep_idx = list(labels).index(cluster_id)
    rep_chunk = raw_chunks[rep_idx]

    # Ask LLM to name this topic
    # Updated prompt
    prompt = (
        f"Give a very short and clear title for the following topic content.\n"
        f"Just return the title. No explanations, no quotes, no alternatives, no extra text.\n\n"
        f"{rep_chunk}"
    )
    raw_title = llm.invoke(prompt).content.strip()
    clean_title = re.sub(r'^["“”‘’\'*]*|["“”‘’\'*.:]*$', '', raw_title)  # trim quotes, punctuation
    clean_title = re.sub(r'^(Topic Title|Title)\s*[:\-]\s*', '', clean_title, flags=re.IGNORECASE)
    clean_title = clean_title.split("\n")[0].strip()
    cluster_topic_titles[cluster_id] = clean_title
    
labeled_chunks = []
for i, chunk_text in enumerate(raw_chunks):
    chunk_meta = {
        "section_id": all_chunks[i].metadata["section_id"],
        "chunk_id": all_chunks[i].metadata["chunk_id"],
        "cluster_id": int(labels[i]),
        "topic": cluster_topic_titles[labels[i]]
    }
    labeled_chunks.append({
        "text": chunk_text,
        "embedding": embeddings[i],
        "metadata": chunk_meta
    })

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [16]:
vectorstore = FAISS.from_texts(
    texts=[chunk["text"] for chunk in labeled_chunks],
    embedding=model,
    metadatas=[chunk["metadata"] for chunk in labeled_chunks]
)

# === Done! You can now use vectorstore.as_retriever() ===
retriever = vectorstore.as_retriever(search_kwargs=dict(k=5))

INFO: Loading faiss with AVX2 support.
INFO: Successfully loaded faiss with AVX2 support.
INFO: Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes. This is only an error if you're trying to use GPU Faiss.


In [17]:
# Get all stored documents from FAISS
all_docs = vectorstore.similarity_search("placeholder", k=len(vectorstore.docstore._dict))

# Extract and print all unique topics
topics = set()
for doc in all_docs:
    topic = doc.metadata.get("topic")
    if topic:
        topics.add(topic)


print("Unique Topics:")
for topic in sorted(topics):
    print("-", topic)


Unique Topics:
- Business Ethics
- Continuous Improvement
- Effective Labour Management
- Interested Groups
- Internal Audit
- Protecting Shareholder Interests
- Social Compliance Audits
- Social Responsibilities of Business
- Socio-Economic and Socio-Human Obligations of Business
- What is Provided


In [19]:
def get_chunks_by_topic(vectorstore, topic_query):
    all_docs = vectorstore.similarity_search("placeholder", k=len(vectorstore.docstore._dict))
    
    topic_chunks = []
    for doc in all_docs:
        if doc.metadata.get("topic", "").lower() == topic_query.lower():
            topic_chunks.append(doc.page_content)
    
    return topic_chunks


In [20]:
topic = "Internal Audit"  # or input("Enter topic: ")
chunks = get_chunks_by_topic(vectorstore, topic)

print(f"\nFound {len(chunks)} chunks for topic '{topic}':\n")
for i, chunk in enumerate(chunks, 1):
    print(f"Chunk {i}:\n{chunk}\n")



Found 6 chunks for topic 'Internal Audit':

Chunk 1:
annually. - External auditors
might not fully understand the

Chunk 2:
Internal Audit

Chunk 3:
errors that internal audits might
company’s internal processes. -

Chunk 4:
always detect major frauds or external risks.

Chunk 5:
External Audit - Provides independent and unbiased assessment. - Enhances stakeholder confidence, especially for investors and regulatory bodies. - Helps detect major frauds and internal audits.
More expensive compared to
Limited
frequency, usually conducted

Chunk 6:
monitoring of financial and operational processes. - Helps identify risks and inefficiencies before external scrutiny. - More cost-effective compared to external audits. - Ensures compliance with internal policies and regulations. - Improves overall management control and decision-making. - May lack objectivity since auditors are part of the organization. - Findings might be influenced by internal pressures or conflicts of interest. - Limited cr

In [21]:
from langchain.prompts import PromptTemplate

template = """
You are an expert educational content designer.

Your task is to help retrieve **realistic educational visuals** from the web for the topic **"{topic}"**.

Instructions:
- You will be given text chunks related to the topic.
- Analyze all chunks holistically.
- Identify 1 to 3 key visualizable concepts.
- Based on the concepts, suggest **1 to 3 visual descriptors** that are suitable for web image retrieval.
- These images will be fetched from sources like **DuckDuckGo**
- Later, a separate model (like BLIP2) will describe the retrieved image and generate audio captions — so your job is just to suggest the most **searchable visual ideas**.
-- If only 1 or 2 are needed, output fewer.

Important Notes:
- Your descriptors must be **web-search friendly**, realistic, and likely to return good visuals.
- Do NOT suggest fictional or AI-specific styles like “a digital painting” or “ultra-detailed 4K illustration”.
- You **can suggest things like graphs, real-world scenes, physical experiments**, etc., if they are commonly found online.
- If you mention a physics diagram or formula chart, clarify that it's **just a reference to what's expected to be found** on the web.

Return format strictly as:
{{
  "image1": "<descriptor 1>",
  "image2": "<descriptor 2>",
  "image3": "<descriptor 3>"
}}

Rules:
- If one image is enough, return only "image1".
- Do not include any narration or explanation — only the descriptors.
- Do not use JSON formatting or code — just follow the shown format.

Content Chunks:
{chunks}
"""

prompt = PromptTemplate.from_template(template)


In [22]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate

# Example descriptor generation shots
examples = [
  {
    "topic": "Photosynthesis",
    "chunks": "Photosynthesis is the process by which green plants use sunlight to make food from carbon dioxide and water. Oxygen is released as a byproduct.",
    "descriptors": [
      "Diagram of photosynthesis in plants",
      "Chloroplast structure and function",
      "Photosynthesis chemical reaction chart"
    ]
  },
  {
    "topic": "Newton's Laws of Motion",
    "chunks": "Newton's three laws describe how objects move and interact with forces. The first law is about inertia, second about force and acceleration, and third about action and reaction.",
    "descriptors": [
      "Illustration of Newton's 3 laws with examples",
      "Force and acceleration graph",
      "Action-reaction force diagram"
    ]
  },
  {
    "topic": "Acids and Bases",
    "chunks": "Acids release H+ ions while bases release OH- ions. They are measured on the pH scale. Neutralization reactions occur when acids and bases combine.",
    "descriptors": [
      "pH scale with common substances",
      "Acid-base titration curve",
      "Neutralization reaction diagram"
    ]
  },
  {
    "topic": "Mitosis",
    "chunks": "Mitosis is the process of cell division in which a single cell divides into two identical daughter cells. It includes stages like prophase, metaphase, anaphase, and telophase.",
    "descriptors": [
      "Mitosis stages under microscope",
      "Cell cycle diagram with mitosis",
      "Mitosis vs meiosis comparison chart"
    ]
  },
  {
    "topic": "Ohm's Law",
    "chunks": "Ohm's Law states that the current through a conductor is directly proportional to voltage and inversely proportional to resistance.",
    "descriptors": [
      "Ohm's law triangle diagram",
      "Current-voltage-resistance graph",
      "Simple circuit showing Ohm's Law"
    ]
  },
  {
    "topic": "Periodic Table",
    "chunks": "The periodic table organizes elements based on atomic number and properties. Groups and periods reveal patterns in reactivity and structure.",
    "descriptors": [
      "Modern periodic table labeled",
      "Group trends in periodic table",
      "Periodic table block diagram"
    ]
  },
  {
    "topic": "DNA Structure",
    "chunks": "DNA is composed of nucleotides forming a double helix. It carries genetic instructions using base pairs A-T and G-C.",
    "descriptors": [
      "DNA double helix 3D model",
      "Base pairing in DNA strands",
      "Nucleotide structure diagram"
    ]
  },
  {
    "topic": "Chemical Bonding",
    "chunks": "Atoms bond to achieve stable electron configurations. Common types include ionic, covalent, and metallic bonding.",
    "descriptors": [
      "Ionic vs covalent bonding diagram",
      "Lewis structure examples",
      "Molecular structure of water"
    ]
  },
  {
    "topic": "Thermodynamics",
    "chunks": "Thermodynamics studies energy transfer. Laws of thermodynamics describe conservation of energy and entropy changes.",
    "descriptors": [
      "Laws of thermodynamics flowchart",
      "Heat engine efficiency diagram",
      "Entropy change vs temperature graph"
    ]
  },
  {
    "topic": "Human Digestive System",
    "chunks": "The digestive system breaks down food into nutrients. Key organs include mouth, stomach, intestines, liver, and pancreas.",
    "descriptors": [
      "Human digestive system labeled diagram",
      "Process of digestion infographic",
      "Enzyme function in digestion chart"
    ]
  }
]


In [38]:

topic = "Social Responsibilities of Business"  # or input("Enter topic: ")
chunks = get_chunks_by_topic(vectorstore, topic)
# Create individual prompt template for each example
example_prompt = PromptTemplate.from_template(
    "Topic: {topic}\nChunks: {chunks}\nDescriptors: {descriptors}"
)


descriptor_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    
    suffix=template,
    input_variables=["topic", "chunks"]
)


In [39]:
final_prompt =descriptor_prompt.format(topic=topic, chunks=chunks)
response = llm.predict(final_prompt)

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [40]:
print(response)

{
  "image1": "Businesses adhering to international labor standards and sustainability goals",
  "image2": "Infographic illustrating the concept of social responsibility in business",
  "image3": "Real-world example of a company's positive contributions to human betterment"
}


In [42]:
import json
data = json.loads(response)
for key, value in data.items():
    print(f"{value}")

Businesses adhering to international labor standards and sustainability goals
Infographic illustrating the concept of social responsibility in business
Real-world example of a company's positive contributions to human betterment


In [43]:
from duckduckgo_search import DDGS
import requests
import os
from PIL import Image
from io import BytesIO
import time


os.makedirs("retrieved_images", exist_ok=True)

def try_download(image_url, filepath):
    try:
        res = requests.get(image_url, timeout=5)
        if res.status_code == 200 and 'image' in res.headers.get('Content-Type', ''):
            with open(filepath, 'wb') as f:
                f.write(res.content)
            return True
    except:
        pass
    return False
def is_valid_image(image_bytes, min_width=400, min_height=300, min_size_kb=30):
    try:
        img = Image.open(BytesIO(image_bytes))
        width, height = img.size
        file_size_kb = len(image_bytes) / 1024
        return width >= min_width and height >= min_height and file_size_kb >= min_size_kb
    except:
        return False
ddgs = DDGS()
for key, query in data.items():
    time.sleep(10)  # Be kind to the API and avoid rate limiting
    results = ddgs.images(
        keywords=query,
        region="wt-wt",
        safesearch="off",
        size='Large',
        color="Monochrome",
        type_image=None,
        layout=None,
        license_image=None,
        max_results=3,
    )

    found = False
    for r in results:
        if try_download(r['image'], f"retrieved_images/{key}.jpg") and is_valid_image(requests.get(r['image']).content):
            print(f"✅ Downloaded {key}")
            found = True
            break
    if not found:
        print(f"❌ Failed to download any valid image for {key}")


INFO: response: https://duckduckgo.com/?q=Businesses+adhering+to+international+labor+standards+and+sustainability+goals 200
INFO: response: https://duckduckgo.com/i.js?o=json&q=Businesses+adhering+to+international+labor+standards+and+sustainability+goals&l=wt-wt&vqd=4-121164760877349208835404872790448805105&p=-1&f=%2Csize%3ALarge%2Ccolor%3AMonochrome%2C%2C%2C 200


✅ Downloaded image1


INFO: response: https://duckduckgo.com/?q=Infographic+illustrating+the+concept+of+social+responsibility+in+business 200
INFO: response: https://duckduckgo.com/i.js?o=json&q=Infographic+illustrating+the+concept+of+social+responsibility+in+business&l=wt-wt&vqd=4-190511171879239057419244076364617071959&p=-1&f=%2Csize%3ALarge%2Ccolor%3AMonochrome%2C%2C%2C 200


✅ Downloaded image2


INFO: response: https://duckduckgo.com/?q=Real-world+example+of+a+company%27s+positive+contributions+to+human+betterment 200
INFO: response: https://duckduckgo.com/i.js?o=json&q=Real-world+example+of+a+company%27s+positive+contributions+to+human+betterment&l=wt-wt&vqd=4-253437527878616308116602332211121310677&p=-1&f=%2Csize%3ALarge%2Ccolor%3AMonochrome%2C%2C%2C 200


✅ Downloaded image3


In [28]:
from transformers import BlipProcessor, BlipForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base",use_fast=True)
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")


In [29]:
speech_prompt_template = """
You are an expert educator.

You are shown an image related to the topic: **"{topic}"**.

You are provided with:
- A **caption**: a textual description generated by a vision-language model (may be vague or incorrect).
- **Extracted text**: raw OCR results from the image (may be messy or incomplete).
- **Chunks**: trusted educational content related to the topic.

Your task is to write a short, **educational audio narration (max 3 sentences-100 words)** that clearly explains the image for a learner. 

Guidelines:
- Use the **caption** only if it seems valid and relevant.
- It can interpret the graph plots or flowcharts as **persons body or object** then ignore the caption.
- Use the **OCR text** to understand any visible formulas, labels, or structure — but ignore gibberish.
- Use the **chunks** to ground your explanation in actual academic content.
- Only use the relevant parts of the chunks that relate to the image.
- Do **not** assume anything beyond what can be inferred from the image and the chunks.
- Explain what's happening **visually**, like describing a process flow, a graph trend, or what a diagram shows.
- Avoid technical fluff. Be clear, concise, and engaging.
- Start with phrases like **"In this image..."**, **"You can see..."**, or **"The diagram illustrates..."**



Return only the narration .Do not include any labels,prefixes,or formatting

---


rules:
- Do not use JSON formatting or code or heading or sub-headings — just follow the shown format
- just give the speech (i.e if it starts from "in this image..." the output should start with "In this image...")

**Caption**:
{caption}

**Extracted Text**:
{ocr_text}

**Content Chunks**:
{chunks}

---

Please write a narrated speech for this visual (up to 3 sentences):
"""




In [31]:
from langchain.prompts import PromptTemplate
speech_prompt = PromptTemplate.from_template(speech_prompt_template)


In [44]:
topic = "Social Responsibilities of Business"  # or input("Enter topic: ")
chunks = get_chunks_by_topic(vectorstore, topic)
print(",".join(chunks).strip())

•Related international standards and best practice, such as those from the International Labour Organization
•A business’s own stated sustainability goals,•“By social responsibility, we mean the intelligent and objective concern for the welfare of society that restrains individual and corporate behaviour from ultimately destructive activities, no matter how immediately profitable and leads in the direction of positive contributions to human betterment, variously as the latter may be defined.”,Definition of Social Responsibility
•“Social responsibilities refer to the businessman‟s decisions and actions taken to reasons at least partially beyond the firm‟s direct economic or technical interest.”,•These would be determined and would vary from case to case according to the customs, religions, traditions, level of industrialization and a host of other norms and standards about which there is a public consensus at any given time in a given society.
•According to Keith Davis, the term social 

In [45]:
all_results ={}
import os
import json
import cv2
import pytesseract
from PIL import Image
import torch
for file in os.listdir("retrieved_images"):
    if not file.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue  
    img_path = os.path.join("retrieved_images", file)
    img_cv = cv2.imread(img_path)
    gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)  #
    _,thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)

    img_processeed = Image.fromarray(thresh)
    extracted_text   = pytesseract.image_to_string(img_processeed)
    print(extracted_text) 
    image = Image.open(img_path).convert("RGB")
    inputs = processor(image,"The image is a ", return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
    with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=50)
        caption = processor.decode(generated_ids[0], skip_special_tokens=True)
    final_prompt = speech_prompt.format(
    topic=topic,
    chunks=",".join(chunks).strip(),
    ocr_text=extracted_text.strip(),
    caption=caption.strip(),
    )
    response = llm.invoke(final_prompt)
    print(response.content)
    all_results[file] = {
        "caption": caption.strip(),
        "extracted_text": extracted_text.strip(),
        "speech": response.content.strip()
    }
with open("results.json", "w",encoding="utf-8") as f:
    json.dump(all_results, f, ensure_ascii=False, indent=2)
print("Results saved to results.json")

    
        

    # Path to your image file   

    

¢ Globalizaiton of

® Do labor standar

sus
Wie

« Social Dumping

trade

believe sO, bul ne prot

e between developed and ¢

i

nN

elk DIN 1



INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In this image, you can see a discussion about whether businesses should care about international labor standards. The diagram illustrates the concept of social dumping, where businesses in developed countries take advantage of cheaper labor in other countries, potentially harming workers and local economies. This highlights the importance of considering social responsibility in business decisions, as discussed in the definition by Adolph Berle, where managers must be responsive to public consensus.



INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In this image, you can see a pyramid with the words at the top and the words below it, illustrating the different levels of social responsibility that a business may have. The top level represents the business's own stated sustainability goals, while the lower levels show the various international standards and best practices that guide a business's social responsibility, such as those from the International Labour Organization. This pyramid structure highlights the importance of balancing a business's economic interests with its social obligations to society.



INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In this image, you can see a diagram illustrating the concept of social responsibility in business. The diagram shows how a business's decisions and actions are influenced by various factors, including international standards, best practices, and its own sustainability goals. This aligns with the definition of social responsibility, which refers to a business's responsiveness to public consensus and its decisions and actions taken to contribute positively to human betterment.
Results saved to results.json


In [46]:
import json
import pyttsx3
import os
os.makedirs("audio_files", exist_ok=True)

engine = pyttsx3.init()
engine.setProperty('rate', 170)  # Set speech rate
voices = engine.getProperty('voices')

# Select male voice (usually index 0 or try looping to find one)
for voice in voices:
    if 'male' in voice.name.lower():
        engine.setProperty('voice', voice.id)
        break


with open("results.json", "r", encoding="utf-8") as f:
    results = json.load(f)
for file, data in results.items():
    speech_text = data["speech"]
    if speech_text.strip():
        engine.save_to_file(speech_text, f"audio_files/{file}.mp3")
        print(f"Audio saved for {file}")
    else:   
        print(f"No speech generated for {file}, skipping audio generation.")
engine.runAndWait() 
    
   
    
    
    

Audio saved for image1.jpg
Audio saved for image2.jpg
Audio saved for image3.jpg


In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

prompt_template = """You are a helpful assistant. 
Use ONLY the following context to answer the question. 
Do NOT use any prior knowledge. 
If the answer is not in the context, respond with "The answer is not available in the provided context."

Context:
{context}

Question:
{question}

Answer:"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True
)



#agent 

query = "what is used to  capture the frequency of individul words in a document?"
result = qa_chain.invoke({"query": query})






INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [47]:
import os
import json
from datetime import timedelta


# === STEP 1: Utilities for Subtitle Generation ===

def estimate_timings(speech_text, wpm):
    sentences = [s.strip() for s in speech_text.split('.') if s.strip()]
    subtitles = []
    start = 0.0
    for i, sentence in enumerate(sentences):
        word_count = len(sentence.split())
        duration = word_count / (wpm / 60.0)  # in seconds
        end = start + duration
        subtitles.append((i+1, start, end, sentence + '.'))
        start = end
    return subtitles

def format_time(seconds):
    td = str(timedelta(seconds=seconds)).split(".")[0]
    return td + ",000"

def write_srt(subtitles, filepath):
    with open(filepath, "w", encoding="utf-8") as f:
        for idx, start, end, sentence in subtitles:
            f.write(f"{idx}\n")
            f.write(f"{format_time(start)} --> {format_time(end)}\n")
            f.write(f"{sentence}\n\n")




In [50]:
#captions
import json
from moviepy import ImageClip, AudioFileClip, TextClip, CompositeVideoClip, concatenate_videoclips
import os

# Ensure output directory exists
os.makedirs("final_video", exist_ok=True)

# Load results
with open("results.json", "r") as f:
    results = json.load(f)

all_video_clips = []
caption_width = 1000

for key, value in results.items():
    image_path = f"retrieved_images/{key}"
    audio_path = f"audio_files/{key}.mp3"
    speech = value["speech"]
    
    
    # Estimate subtitle timings
    subtitles = estimate_timings(speech,150)  # Should return list of (idx, start, end, sentence)

    # Load image and audio
    image_clip = ImageClip(image_path).resized((1280, 720))  # Resize to 1280x720
    audio_clip = AudioFileClip(audio_path)
    duration = audio_clip.duration
    image_clip = image_clip.with_duration(duration)
    image_clip.audio = audio_clip.subclipped(0, duration)

    # Generate subtitle text cli
    text_clips = []
    for idx, start, end, sentence in subtitles:
        txt = TextClip(
            text=sentence,
            font_size=33,
            size=(caption_width, 100),
            method="caption",
            color='black',
           
        )
        txt = txt.with_start(start).with_duration(end - start).with_position('bottom')
        text_clips.append(txt)

    # Combine image and subtitles
    composite = CompositeVideoClip([image_clip] + text_clips)
    all_video_clips.append(composite)

# Concatenate all clips into a single video
final_video = concatenate_videoclips(all_video_clips)
final_video.write_videofile("final_video/final_combined_video.mp4", fps=24)


MoviePy - Building video final_video/final_combined_video.mp4.
MoviePy - Writing audio in final_combined_videoTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing video final_video/final_combined_video.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready final_video/final_combined_video.mp4




In [None]:
#without captions
import json
from moviepy import ImageClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips
import os

# Ensure output directory exists
os.makedirs("final_video", exist_ok=True)

# Load results
with open("results.json", "r") as f:
    results = json.load(f)

all_video_clips = []

for key, value in results.items():
    image_path = f"retrieved_images/{key}"
    audio_path = f"audio_files/{key}.mp3"

    # Load and resize image, load audio
    image_clip = ImageClip(image_path).resized((1280, 720))
    audio_clip = AudioFileClip(audio_path)

    # Set duration and audio
    duration = audio_clip.duration
    image_clip = image_clip.with_duration(duration)
    image_clip.audio = audio_clip.subclipped(0, duration)

    # Append to the video clips list
    all_video_clips.append(image_clip)

# Concatenate all clips into a single video
final_video = concatenate_videoclips(all_video_clips)
final_video.write_videofile("final_video/final_combined_video_without_caption.mp4", fps=24)


MoviePy - Building video final_video/final_combined_video.mp4.
MoviePy - Writing audio in final_combined_videoTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing video final_video/final_combined_video.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready final_video/final_combined_video.mp4
