In [6]:
from langchain_unstructured import UnstructuredLoader
loader_local = UnstructuredLoader(
    file_path="nlp.pdf",
    strategy="hi_res",#use fast for faster loading
    mode="elements",  # use "elements" to get structured data
)
print(loader_local)
print("Loading documents from local file...")
docs_local = []    
for doc in loader_local.load():
    docs_local.append(doc)

<langchain_unstructured.document_loaders.UnstructuredLoader object at 0x0000024A69F14D40>
Loading documents from local file...


INFO: Reading PDF for file: nlp.pdf ...


In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

all_chunks = []
for doc in docs_local:
    chunks = text_splitter.split_text(doc.page_content)
    all_chunks.extend(chunks)

print("Total chunks:", len(all_chunks))


Total chunks: 91


In [23]:
all_chunks[0]

'1. What is Natural Language Processing (NLP)? \nAnswer: Natural Language Processing (NLP) is a field of Artificial Intelligence (AI) \nconcerned with the interactions between computers and human (natural) languages. It \nfocuses on enabling computers to understand, interpret, and generate human language in \na way that is both meaningful and useful. \n2. Mention any two real-world applications of NLP. \nAnswer: \n• \nSentiment Analysis: Determining the emotional tone or attitude expressed in text,'

In [101]:
first_page_docs = [doc for doc in docs_local if doc.metadata.get("page_number") == 3]

for doc in first_page_docs:
    print(doc.page_content)

The Coordination Committee formed by GR No. Abhyas - 2116/(Pra.Kra.43/16) SD - 4    Dated 25.4.2016 has given approval to prescribe this textbook in its meeting held on 3.3.2017  GENERAL  SCIENCE  STANDARD  SEVEN  Maharashtra  State  Bureau  of  Textbook  Production  and   Curriculum  Research,  Pune  -  411  004. 
The Coordination Committee formed by GR No. Abhyas - 2116/(Pra.Kra.43/16) SD - 4 Dated 25.4.2016 has given approval to prescribe this textbook in its meeting held on 3.3.2017
GENERAL SCIENCE
STANDARD SEVEN
Maharashtra State Bureau of Textbook Production and
Curriculum Research, Pune - 411 004.
tn 5YEKix |
The digital textbook can be obtained through DIKSHA APP on a smartphone by using the Q. R. Code given on title page of the textbook and useful audio-visual teaching-learning material of the relevant lesson will be available through the Q. R. Code given in each lesson of this textbook.
A


In [102]:
section_data =[]
section =""
for docs in docs_local:
    if docs.metadata.get("category") == "Title":
        
        section_data.append(section)
        section =""
        section+= docs.page_content + "\n"
        
    else:
        section += docs.page_content + "\n"

       
   

In [103]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer

# Initialize
model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)

# Suppose section_data is a list of section texts
all_chunks = []
raw_chunks =[]

for i, section in enumerate(section_data):
    if len(section) > 0:
        chunks = text_splitter.split_text(section)
        print(f"Section {i} has {len(chunks)} chunks")
        for j, chunk in enumerate(chunks):
            # Optional: Add metadata like section number
            all_chunks.append(Document(
                page_content=chunk,
                metadata={"section_id": i, "chunk_id": j}
            ))
            raw_chunks.append(chunk)

# Create the vector index
embeddings = model.embed_documents(raw_chunks)





INFO: Use pytorch device_name: cpu
INFO: Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Section 0 has 1 chunks
Section 1 has 1 chunks
Section 2 has 3 chunks
Section 3 has 1 chunks
Section 4 has 1 chunks
Section 5 has 1 chunks
Section 6 has 2 chunks
Section 7 has 1 chunks
Section 8 has 2 chunks
Section 9 has 1 chunks
Section 10 has 1 chunks
Section 11 has 1 chunks
Section 12 has 1 chunks
Section 13 has 1 chunks
Section 14 has 1 chunks
Section 15 has 1 chunks
Section 16 has 1 chunks
Section 17 has 1 chunks
Section 18 has 1 chunks
Section 19 has 1 chunks
Section 20 has 1 chunks
Section 21 has 4 chunks
Section 22 has 1 chunks
Section 23 has 1 chunks
Section 24 has 1 chunks
Section 25 has 1 chunks
Section 26 has 1 chunks
Section 27 has 1 chunks
Section 28 has 1 chunks
Section 29 has 1 chunks
Section 30 has 1 chunks
Section 31 has 1 chunks
Section 32 has 1 chunks
Section 33 has 1 chunks
Section 34 has 1 chunks
Section 35 has 3 chunks
Section 36 has 4 chunks
Section 37 has 1 chunks
Section 38 has 1 chunks
Section 39 has 6 chunks
Section 40 has 1 chunks
Section 41 has 1 chunks
Se

In [104]:
#cluster embeddings
from sklearn.cluster import KMeans
num_topics = 10
kmeans = KMeans(n_clusters=num_topics, random_state=42)
labels = kmeans.fit_predict(embeddings)


In [105]:
import os
import dotenv
dotenv.load_dotenv()
from langchain_groq import ChatGroq
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=700,
    timeout=None,
    max_retries=2,
) 

In [106]:
import re
cluster_topic_titles = {}
for cluster_id in set(labels):
    rep_idx = list(labels).index(cluster_id)
    rep_chunk = raw_chunks[rep_idx]

    # Ask LLM to name this topic
    # Updated prompt
    prompt = (
        f"Give a very short and clear title for the following topic content.\n"
        f"Just return the title. No explanations, no quotes, no alternatives, no extra text.\n\n"
        f"{rep_chunk}"
    )
    raw_title = llm.invoke(prompt).content.strip()
    clean_title = re.sub(r'^["“”‘’\'*]*|["“”‘’\'*.:]*$', '', raw_title)  # trim quotes, punctuation
    clean_title = re.sub(r'^(Topic Title|Title)\s*[:\-]\s*', '', clean_title, flags=re.IGNORECASE)
    clean_title = clean_title.split("\n")[0].strip()
    cluster_topic_titles[cluster_id] = clean_title
    
labeled_chunks = []
for i, chunk_text in enumerate(raw_chunks):
    chunk_meta = {
        "section_id": all_chunks[i].metadata["section_id"],
        "chunk_id": all_chunks[i].metadata["chunk_id"],
        "cluster_id": int(labels[i]),
        "topic": cluster_topic_titles[labels[i]]
    }
    labeled_chunks.append({
        "text": chunk_text,
        "embedding": embeddings[i],
        "metadata": chunk_meta
    })

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [107]:
vectorstore = FAISS.from_texts(
    texts=[chunk["text"] for chunk in labeled_chunks],
    embedding=model,
    metadatas=[chunk["metadata"] for chunk in labeled_chunks]
)

# === Done! You can now use vectorstore.as_retriever() ===
retriever = vectorstore.as_retriever(search_kwargs=dict(k=5))

In [108]:
# Get all stored documents from FAISS
all_docs = vectorstore.similarity_search("placeholder", k=len(vectorstore.docstore._dict))

# Extract and print all unique topics
topics = set()
for doc in all_docs:
    topic = doc.metadata.get("topic")
    if topic:
        topics.add(topic)


print("Unique Topics:")
for topic in sorted(topics):
    print("-", topic)


Unique Topics:
- Birds
- Differentiating Properties and Functions
- Electromagnets and Everyday Phenomena
- Elements, Compounds and Mixtures
- General Science
- Germination of Seeds
- Maharashtra State Bureau of Textbook Production and Curriculum Research
- Pune
- Science Experiments and Models
- Standard Seven


In [109]:
def get_chunks_by_topic(vectorstore, topic_query):
    all_docs = vectorstore.similarity_search("placeholder", k=len(vectorstore.docstore._dict))
    
    topic_chunks = []
    for doc in all_docs:
        if doc.metadata.get("topic", "").lower() == topic_query.lower():
            topic_chunks.append(doc.page_content)
    
    return topic_chunks


In [110]:
topic = "General Science"  # or input("Enter topic: ")
chunks = get_chunks_by_topic(vectorstore, topic)

print(f"\nFound {len(chunks)} chunks for topic '{topic}':\n")
for i, chunk in enumerate(chunks, 1):
    print(f"Chunk {i}:\n{chunk}\n")



Found 138 chunks for topic 'General Science':

Chunk 1:
5.3 Our favourites
On a day out with our friends or family we enjoy mouth-watering dishes like pani-puri, shev- puri, pav-bhaji, vada-pav, pizzas, burgers. But, have you ever given a thought to how or where they were made ? Were they displayed and served hygienically ? What was the source of the water used for preparing them ? Discuss all these matters with your science teacher.

Observe. 
Which fruits in the picture appear to be good to eat ? Why ?

Chunk 2:
juice, food becomes

Chunk 3:
of Food

Chunk 4:
While buying things at grocery shops, the vegetable market, remember to look out for the following and tell your guardians to do so, too.
1. Does the balance carry the stamp of standardisation by the department of weights and measures ?
2. Is the balance stable ? Is the pointer of the balance upright ?

Chunk 5:
• Making pictures and slogans against tobacco consumption, smoking, drinking alcohol, etc. and displaying them in the

In [112]:
from langchain.prompts import PromptTemplate

template = """
You are an expert educational content designer.

Your task is to help retrieve **realistic educational visuals** from the web for the topic **"{topic}"**.

Instructions:
- You will be given text chunks related to the topic.
- Analyze all chunks holistically.
- Identify 1 to 3 key visualizable concepts.
- Based on the concepts, suggest **1 to 3 visual descriptors** that are suitable for web image retrieval.
- These images will be fetched from sources like **DuckDuckGo**
- Later, a separate model (like BLIP2) will describe the retrieved image and generate audio captions — so your job is just to suggest the most **searchable visual ideas**.
-- If only 1 or 2 are needed, output fewer.
-**Give as simple and efficient as possible which can be retrieved from the web**

Important Notes:
- Your descriptors must be **web-search friendly**, realistic, and likely to return good visuals.
- Do NOT suggest fictional or AI-specific styles like “a digital painting” or “ultra-detailed 4K illustration”.
- You **can suggest things like graphs, real-world scenes, physical experiments**, etc., if they are commonly found online.
- If you mention a physics diagram or formula chart, clarify that it's **just a reference to what's expected to be found** on the web.

Return format strictly as:
{{
  "image1": "<descriptor 1>",
  "image2": "<descriptor 2>",
  "image3": "<descriptor 3>"
}}

Rules:
- If one image is enough, return only "image1".
- Do not include any narration or explanation — only the descriptors.
- Do not use JSON formatting or code — just follow the shown format.

Content Chunks:
{chunks}
"""

prompt = PromptTemplate.from_template(template)


In [113]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate

# Example descriptor generation shots
examples = [
  {
    "topic": "Photosynthesis",
    "chunks": "Photosynthesis is the process by which green plants use sunlight to make food from carbon dioxide and water. Oxygen is released as a byproduct.",
    "descriptors": [
      "Diagram of photosynthesis in plants",
      "Chloroplast structure and function",
      "Photosynthesis chemical reaction chart"
    ]
  },
  {
    "topic": "Newton's Laws of Motion",
    "chunks": "Newton's three laws describe how objects move and interact with forces. The first law is about inertia, second about force and acceleration, and third about action and reaction.",
    "descriptors": [
      "Illustration of Newton's 3 laws with examples",
      "Force and acceleration graph",
      "Action-reaction force diagram"
    ]
  },
  {
    "topic": "Acids and Bases",
    "chunks": "Acids release H+ ions while bases release OH- ions. They are measured on the pH scale. Neutralization reactions occur when acids and bases combine.",
    "descriptors": [
      "pH scale with common substances",
      "Acid-base titration curve",
      "Neutralization reaction diagram"
    ]
  },
  {
    "topic": "Mitosis",
    "chunks": "Mitosis is the process of cell division in which a single cell divides into two identical daughter cells. It includes stages like prophase, metaphase, anaphase, and telophase.",
    "descriptors": [
      "Mitosis stages under microscope",
      "Cell cycle diagram with mitosis",
      "Mitosis vs meiosis comparison chart"
    ]
  },
  {
    "topic": "Ohm's Law",
    "chunks": "Ohm's Law states that the current through a conductor is directly proportional to voltage and inversely proportional to resistance.",
    "descriptors": [
      "Ohm's law triangle diagram",
      "Current-voltage-resistance graph",
      "Simple circuit showing Ohm's Law"
    ]
  },
  {
    "topic": "Periodic Table",
    "chunks": "The periodic table organizes elements based on atomic number and properties. Groups and periods reveal patterns in reactivity and structure.",
    "descriptors": [
      "Modern periodic table labeled",
      "Group trends in periodic table",
      "Periodic table block diagram"
    ]
  },
  {
    "topic": "DNA Structure",
    "chunks": "DNA is composed of nucleotides forming a double helix. It carries genetic instructions using base pairs A-T and G-C.",
    "descriptors": [
      "DNA double helix 3D model",
      "Base pairing in DNA strands",
      "Nucleotide structure diagram"
    ]
  },
  {
    "topic": "Chemical Bonding",
    "chunks": "Atoms bond to achieve stable electron configurations. Common types include ionic, covalent, and metallic bonding.",
    "descriptors": [
      "Ionic vs covalent bonding diagram",
      "Lewis structure examples",
      "Molecular structure of water"
    ]
  },
  {
    "topic": "Thermodynamics",
    "chunks": "Thermodynamics studies energy transfer. Laws of thermodynamics describe conservation of energy and entropy changes.",
    "descriptors": [
      "Laws of thermodynamics flowchart",
      "Heat engine efficiency diagram",
      "Entropy change vs temperature graph"
    ]
  },
  {
    "topic": "Human Digestive System",
    "chunks": "The digestive system breaks down food into nutrients. Key organs include mouth, stomach, intestines, liver, and pancreas.",
    "descriptors": [
      "Human digestive system labeled diagram",
      "Process of digestion infographic",
      "Enzyme function in digestion chart"
    ]
  }
]


In [117]:

topic = "General Science"  # or input("Enter topic: ")
chunks = get_chunks_by_topic(vectorstore, topic)
if len(chunks)>20:

    chunks = chunks[:20]
print(len(chunks))
# Create individual prompt template for each example
example_prompt = PromptTemplate.from_template(
    "Topic: {topic}\nChunks: {chunks}\nDescriptors: {descriptors}"
)


descriptor_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    
    suffix=template,
    input_variables=["topic", "chunks"]
)


20


In [118]:
final_prompt =descriptor_prompt.format(topic=topic, chunks=chunks)
response = llm.predict(final_prompt)

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [None]:
print(response[-1])

{
  "image1": "Food preservation methods diagram",
  "image2": "Food adulteration chart with examples",
  "image3": "Food safety guidelines infographic"
}


In [120]:
import json
data = json.loads(response)
for key, value in data.items():
    print(f"{value}")

Food preservation methods diagram
Food adulteration chart with examples
Food safety guidelines infographic


In [None]:
from duckduckgo_search import DDGS
import requests
import os
from PIL import Image
from io import BytesIO
import time


os.makedirs("retrieved_images", exist_ok=True)

def try_download(image_url, filepath):
    try:
        res = requests.get(image_url, timeout=5)
        if res.status_code == 200 and 'image' in res.headers.get('Content-Type', ''):
            with open(filepath, 'wb') as f:
                f.write(res.content)
            return True
    except:
        pass
    return False
def is_valid_image(image_bytes, min_width=400, min_height=300, min_size_kb=30):
    try:
        img = Image.open(BytesIO(image_bytes))
        width, height = img.size
        file_size_kb = len(image_bytes) / 1024
        return width >= min_width and height >= min_height and file_size_kb >= min_size_kb
    except:
        return False
ddgs = DDGS()
for key, query in data.items():
    time.sleep(10)  # Be kind to the API and avoid rate limiting
    results = ddgs.images(
        keywords=query,
        region="wt-wt",
        safesearch="off",
        size='Large',
        color=color,
        type_image=None,
        layout=None,
        license_image=None,
        max_results=3,
    )

    found = False
    for r in results:
        time.sleep(1)
        if try_download(r['image'], f"retrieved_images/{key}.jpg") and is_valid_image(requests.get(r['image']).content):
            print(f"✅ Downloaded {key}")
            found = True
            break
    if not found:
        print(f"❌ Failed to download any valid image for {key}")


INFO: response: https://duckduckgo.com/?q=Food+preservation+methods+diagram 200
INFO: response: https://duckduckgo.com/i.js?o=json&q=Food+preservation+methods+diagram&l=wt-wt&vqd=4-184677305326552688631518669670501513165&p=-1&f=%2C%2C%2C%2C%2C 200


✅ Downloaded image1


INFO: response: https://duckduckgo.com/?q=Food+adulteration+chart+with+examples 200
INFO: response: https://duckduckgo.com/i.js?o=json&q=Food+adulteration+chart+with+examples&l=wt-wt&vqd=4-145849719338446256167383621718486649478&p=-1&f=%2C%2C%2C%2C%2C 200


✅ Downloaded image2


INFO: response: https://duckduckgo.com/?q=Food+safety+guidelines+infographic 200
INFO: response: https://duckduckgo.com/i.js?o=json&q=Food+safety+guidelines+infographic&l=wt-wt&vqd=4-159269571066823541667823745645362592085&p=-1&f=%2C%2C%2C%2C%2C 200


✅ Downloaded image3


In [126]:
from transformers import BlipProcessor, BlipForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base",use_fast=True)
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")


In [127]:
speech_prompt_template = """
You are an expert educator.

You are shown an image related to the topic: **"{topic}"**.

You are provided with:
- A **caption**: a textual description generated by a vision-language model (may be vague or incorrect).
- **Extracted text**: raw OCR results from the image (may be messy or incomplete).
- **Chunks**: trusted educational content related to the topic.

Your task is to write a short, **educational audio narration (max 3 sentences-100 words)** that clearly explains the image for a learner. 

Guidelines:
- Use the **caption** only if it seems valid and relevant.
- It can interpret the graph plots or flowcharts as **persons body or object** then ignore the caption.
- Use the **OCR text** to understand any visible formulas, labels, or structure — but ignore gibberish.
- Use the **chunks** to ground your explanation in actual academic content.
- Only use the relevant parts of the chunks that relate to the image.
- Do **not** assume anything beyond what can be inferred from the image and the chunks.
- Explain what's happening **visually**, like describing a process flow, a graph trend, or what a diagram shows.
- Avoid technical fluff. Be clear, concise, and engaging.
- Start with phrases like **"In this image..."**, **"You can see..."**, or **"The diagram illustrates..."**



Return only the narration .Do not include any labels,prefixes,or formatting

---


rules:
- Do not use JSON formatting or code or heading or sub-headings — just follow the shown format
- just give the speech (i.e if it starts from "in this image..." the output should start with "In this image...")

**Caption**:
{caption}

**Extracted Text**:
{ocr_text}

**Content Chunks**:
{chunks}

---

Please write a narrated speech for this visual (up to 3 sentences):
"""




In [128]:
from langchain.prompts import PromptTemplate
speech_prompt = PromptTemplate.from_template(speech_prompt_template)


In [130]:
topic = "General Science"  # or input("Enter topic: ")
chunks = get_chunks_by_topic(vectorstore, topic)
if len(chunks)>20:
    chunks = chunks[:20]
print(",".join(chunks).strip())

5.3 Our favourites
On a day out with our friends or family we enjoy mouth-watering dishes like pani-puri, shev- puri, pav-bhaji, vada-pav, pizzas, burgers. But, have you ever given a thought to how or where they were made ? Were they displayed and served hygienically ? What was the source of the water used for preparing them ? Discuss all these matters with your science teacher.

Observe. 
Which fruits in the picture appear to be good to eat ? Why ?,juice, food becomes,of Food,While buying things at grocery shops, the vegetable market, remember to look out for the following and tell your guardians to do so, too.
1. Does the balance carry the stamp of standardisation by the department of weights and measures ?
2. Is the balance stable ? Is the pointer of the balance upright ?,• Making pictures and slogans against tobacco consumption, smoking, drinking alcohol, etc. and displaying them in the classroom and the neighbourhood. Keeping a watch on whether one’s surroundings are tobacco-free.

In [136]:
all_results ={}
import os
import json
import cv2
import pytesseract
from PIL import Image
import torch
for file in os.listdir("retrieved_images"):
    if not file.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue  
    img_path = os.path.join("retrieved_images", file)
    img_cv = cv2.imread(img_path)
    gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)  #
   

    img_processeed = Image.fromarray(gray)
    extracted_text   = pytesseract.image_to_string(img_processeed)
    print(extracted_text) 
    image = Image.open(img_path).convert("RGB")
    inputs = processor(image,"The image is a ", return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
    with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=50)
        caption = processor.decode(generated_ids[0], skip_special_tokens=True)
    final_prompt = speech_prompt.format(
    topic=topic,
    chunks=",".join(chunks).strip(),
    ocr_text=extracted_text.strip(),
    caption=caption.strip(),
    )
    response = llm.invoke(final_prompt)
    print(response.content)
    all_results[file] = {
        "caption": caption.strip(),
        "extracted_text": extracted_text.strip(),
        "speech": response.content.strip()
    }
with open("results.json", "w",encoding="utf-8") as f:
    json.dump(all_results, f, ensure_ascii=False, indent=2)
print("Results saved to results.json")

    
        

    # Path to your image file   

    

os




INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In this image, you can see a vendor serving pani puri with dirty hands, which is a common practice that can lead to food contamination and food poisoning. This is a serious issue because it can cause harm to people who consume such food, and it's essential to follow proper hygiene practices when handling and serving food. As we learned earlier, food safety is crucial, and we should always ensure that food is handled and served in a clean and hygienic manner.
Adulteration in food stuffs

Cereal
Pulses

Bengal gram Flour
Ghee

Milk
Tea

Pepper

Clove

Dhaneya

Red Chelli Powder

Honey
Turmeric

Soil, pieces of stone infested cereal
Khesari dal

Starch powder, maize flour
Vegetable ghee Animal fat,sweet
potato

Water

Used tea leavels

Papaya seeds

Clove after extraction

Saw dust,horse dung

Saw dust,Powdered Red Brick

Sugar, Water
Yellow Soil




INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In this image, you can see a list of various food items and substances that are commonly used in food preparation, but some of them are actually adulterants that can be harmful to our health. The presence of sawdust, horse dung, and other contaminants in the list highlights the importance of food safety and the need to check the quality of food before consuming it. This is in line with the Prevention of Food Adulteration Act, which aims to protect consumers from harmful food substances.
Cancer
Research

American
' Institute for

FOOD) SAFETY GUIDE

Follow these 4 simple steps to help reduce foodborne illnesses

aye
ats
of. seafood and eggs separated
gis When preparing from other food in your shopping
4 storing and cutting fF cart and refrigerator
food, separate raw Z
meat, poultry,
seafood and eges
from fruits and Use separate cutting boards, plates
vegetables and knives when preparing meat —"
| FCOOK |F
ae SSE
nermomet 145%F
ef 165°F
<=
eee 160°F
yy ase
Refrigerate perishable
food wit

INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In this image, you can see a poster that seems to be about food safety, but it's actually a jumbled mix of unrelated information. The text mentions foodborne illnesses, separating raw meat from other foods, and refrigerating perishable items, but it's unclear what the image is supposed to represent.
Results saved to results.json


In [137]:
import json
import pyttsx3
import os
os.makedirs("audio_files", exist_ok=True)

engine = pyttsx3.init()
engine.setProperty('rate', 173)  # Set speech rate
voices = engine.getProperty('voices')

# Select male voice (usually index 0 or try looping to find one)
for voice in voices:
    if 'male' in voice.name.lower():
        engine.setProperty('voice', voice.id)
        break


with open("results.json", "r", encoding="utf-8") as f:
    results = json.load(f)
for file, data in results.items():
    speech_text = data["speech"]
    if speech_text.strip():
        engine.save_to_file(speech_text, f"audio_files/{file}.mp3")
        print(f"Audio saved for {file}")
    else:   
        print(f"No speech generated for {file}, skipping audio generation.")
engine.runAndWait() 
    
   
    
    
    

Audio saved for image1.jpg
Audio saved for image2.jpg
Audio saved for image3.jpg


In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

prompt_template = """You are a helpful assistant. 
Use ONLY the following context to answer the question. 
Do NOT use any prior knowledge. 
If the answer is not in the context, respond with "The answer is not available in the provided context."

Context:
{context}

Question:
{question}

Answer:"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True
)



#agent 

query = "what is used to  capture the frequency of individul words in a document?"
result = qa_chain.invoke({"query": query})






INFO: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [133]:
import os
import json
from datetime import timedelta

# === STEP 1: Utilities for Subtitle Generation ===

def split_into_chunks(text, start, end, chunk_size=5):
    words = text.split()
    total_words = len(words)
    total_chunks = (total_words + chunk_size - 1) // chunk_size
    duration_per_chunk = (end - start) / total_chunks

    chunks = []
    for i in range(total_chunks):
        chunk_text = ' '.join(words[i*chunk_size : (i+1)*chunk_size])
        chunk_start = start + i * duration_per_chunk
        chunk_end = chunk_start + duration_per_chunk
        chunks.append((None, chunk_start, chunk_end, chunk_text))
    return chunks

def estimate_timings(speech_text, wpm):
    sentences = [s.strip() for s in speech_text.split('.') if s.strip()]
    all_chunks = []
    start = 0.0
    idx = 1
    for sentence in sentences:
        word_count = len(sentence.split())
        duration = word_count / (wpm / 60.0)
        end = start + duration
        chunks = split_into_chunks(sentence, start, end, chunk_size=5)
        for chunk in chunks:
            all_chunks.append((idx, chunk[1], chunk[2], chunk[3]))
            idx += 1
        start = end
    return all_chunks

def format_time(seconds):
    td = str(timedelta(seconds=seconds)).split(".")[0]
    return td + ",000"

def write_srt(subtitles, filepath):
    with open(filepath, "w", encoding="utf-8") as f:
        for idx, start, end, sentence in subtitles:
            f.write(f"{idx}\n")
            f.write(f"{format_time(start)} --> {format_time(end)}\n")
            f.write(f"{sentence}\n\n")


In [138]:
#captions
import json
from moviepy import ImageClip, AudioFileClip, TextClip, CompositeVideoClip, concatenate_videoclips
import os

# Ensure output directory exists
os.makedirs("final_video", exist_ok=True)

# Load results
with open("results.json", "r") as f:
    results = json.load(f)

all_video_clips = []
caption_width = 1000

for key, value in results.items():
    image_path = f"retrieved_images/{key}"
    audio_path = f"audio_files/{key}.mp3"
    speech = value["speech"]
    
    
    # Estimate subtitle timings
    subtitles = estimate_timings(speech,160)  # Should return list of (idx, start, end, sentence)

    # Load image and audio
    image_clip = ImageClip(image_path).resized((1280, 720))  # Resize to 1280x720
    audio_clip = AudioFileClip(audio_path)
    duration = audio_clip.duration
    image_clip = image_clip.with_duration(duration)
    image_clip.audio = audio_clip.subclipped(0, duration)

    # Generate subtitle text cli
    text_clips = []
    for idx, start, end, sentence in subtitles:
        txt = TextClip(
            text=sentence,
            font_size=33,
            size=(caption_width, 100),
            method="caption",
            color='black',
           
        )
        txt = txt.with_start(start).with_duration(end - start).with_position('bottom')
        text_clips.append(txt)

    # Combine image and subtitles
    composite = CompositeVideoClip([image_clip] + text_clips)
    all_video_clips.append(composite)

# Concatenate all clips into a single video
final_video = concatenate_videoclips(all_video_clips)
final_video.write_videofile("final_video/final_combined_video.mp4", fps=24)


MoviePy - Building video final_video/final_combined_video.mp4.
MoviePy - Writing audio in final_combined_videoTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing video final_video/final_combined_video.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready final_video/final_combined_video.mp4


In [83]:
#without captions
import json
from moviepy import ImageClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips
import os

# Ensure output directory exists
os.makedirs("final_video", exist_ok=True)

# Load results
with open("results.json", "r") as f:
    results = json.load(f)

all_video_clips = []

for key, value in results.items():
    image_path = f"retrieved_images/{key}"
    audio_path = f"audio_files/{key}.mp3"

    # Load and resize image, load audio
    image_clip = ImageClip(image_path).resized((1280, 720))
    audio_clip = AudioFileClip(audio_path)

    # Set duration and audio
    duration = audio_clip.duration
    image_clip = image_clip.with_duration(duration)
    image_clip.audio = audio_clip.subclipped(0, duration)

    # Append to the video clips list
    all_video_clips.append(image_clip)

# Concatenate all clips into a single video
final_video = concatenate_videoclips(all_video_clips)
final_video.write_videofile("final_video/final_combined_video_without_caption.mp4", fps=24)


MoviePy - Building video final_video/final_combined_video_without_caption.mp4.
MoviePy - Writing audio in final_combined_video_without_captionTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing video final_video/final_combined_video_without_caption.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready final_video/final_combined_video_without_caption.mp4


In [140]:
image = Image.open("retrieved_images/image1.jpg").convert("RGB")
inputs = processor(image,"The image is a ", return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=50)
        caption = processor.decode(generated_ids[0], skip_special_tokens=True)

In [141]:
print(caption)

the image is a diagram of the food and the food that is eaten


In [None]:
import cv2
from PIL import Image
import pytesseract
Img_cv = cv2.imread("retrieved_images/image4.jpg")
gray = cv2.cvtColor(Img_cv, cv2.COLOR_BGR2RGB) 
cv2.imshow("Gray Image", gray)  # Display the image in a window
 #
   

img_processeed = Image.fromarray(gray)
extracted_text   = pytesseract.image_to_string(Img_cv)
print(extracted_text) 

Cancer
Research

American
' Institute for

FOOD) SAFETY, GUIDE

Follow these 4 simple steps to help reduce foodborne illnesses

aye
ats
of, seafood and eggs separated
ie When preparing from other food in your shopping
_f storing and cutting oF cart and refrigerator
food, separate raw Z
meat, poultry,
seafood and eges
from fruits and Use separate cutting boards, plates
vegetables and knives when preparing meat —"
| FCOOK |F
ae nla
ermomet 145%F
ef 165°F
=
eee 160°F
yy ase
Refrigerate perishable
food within 2 hours
_ 40°F Do not defrost and thaw
© Set your refrigerator Defrost and food on the counter.

at 40°F or lower

thaw frozen Bacteria can grow rapidly
food inthe when food is kept at room,
refrigerator temperature =

‘American Institute for Cancer Research | PO Box 97167, Washington, DC 20090-7167 | 800-843-8114 | aicr.org



In [None]:
from google_images_search import GoogleImagesSearch

# you can provide API key and CX using arguments,
# or you can set environment variables: GCS_DEVELOPER_KEY, GCS_CX
gis = GoogleImagesSearch('AIzaSyAIWIjHCq9WHhaNqEIy2OEuYbqDGc1blHI', 'f7d5cb9a750a1453e')

# define search params
# option for commonly used search param are shown below for easy reference.
# For param marked with '##':
#   - Multiselect is currently not feasible. Choose ONE option only
#   - This param can also be omitted from _search_params if you do not wish to define any value
_search_params = {
    'q': '...',
    'num': 1,
    'fileType': 'jpg|gif|png',
    'rights': 'cc_publicdomain|cc_attribute|cc_sharealike|cc_noncommercial|cc_nonderived',
    'safe': 'active|high|medium|off|safeUndefined', ##
    'imgType': 'clipart|face|lineart|stock|photo|animated|imgTypeUndefined', ##
    'imgSize': 'huge|icon|large|medium|small|xlarge|xxlarge|imgSizeUndefined', ##
    'imgDominantColor': 'black|blue|brown|gray|green|orange|pink|purple|red|teal|white|yellow|imgDominantColorUndefined', ##
    'imgColorType': 'color|gray|mono|trans|imgColorTypeUndefined' ##
}

# this will only search for images:
""" gis.search(search_params=_search_params) """

# this will search and download:
gis.search(search_params=_search_params, path_to_dir='retrieved_images/')

""" # this will search, download and resize:
gis.search(search_params=_search_params, path_to_dir='/path/', width=500, height=500) """

""" # search first, then download and resize afterwards:
gis.search(search_params=_search_params)
for image in gis.results():
    image.url  # image direct url
    image.referrer_url  # image referrer url (source) 
    
    image.download('/path/')  # download image
    image.resize(500, 500)  # resize downloaded image

    image.path  # downloaded local file path """

ModuleNotFoundError: No module named '_curses'

In [5]:
import requests

API_KEY = 'AIzaSyAIWIjHCq9WHhaNqEIy2OEuYbqDGc1blHI'
CX = 'f7d5cb9a750a1453e'
query = 'puppies'
search_type = 'image'

url = f'https://www.googleapis.com/customsearch/v1?q={query}&cx={CX}&key={API_KEY}&searchType={search_type}'

response = requests.get(url)
results = response.json()

for item in results.get('items', []):
    print(item['link'])  # Direct image URL


ConnectionError: HTTPSConnectionPool(host='www.googleapis.com', port=443): Max retries exceeded with url: /customsearch/v1?q=puppies&cx=f7d5cb9a750a1453e&key=AIzaSyAIWIjHCq9WHhaNqEIy2OEuYbqDGc1blHI&searchType=image (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000001902DECE630>: Failed to resolve 'www.googleapis.com' ([Errno 11001] getaddrinfo failed)"))

In [6]:
import curses

ModuleNotFoundError: No module named '_curses'

In [10]:
import os
import requests
import json
LIGHTHOUSE_API_KEY = os.getenv("LIGHTHOUSE_API_KEY")
print(LIGHTHOUSE_API_KEY)

def upload_to_lighthouse(video_path):
    url = "https://node.lighthouse.storage/api/v0/add"
    headers = {
        "Authorization": f"Bearer 4616e2eb.a19087940c234f68be253ffacf47d3e9",
    }
    with open(video_path, "rb") as f:
        response = requests.post(url, headers=headers, files={"file": f})
        response.raise_for_status()
        print(response.status_code)
        print(response.text)  # <– this will often give a clue
        response.raise_for_status()
        return response.json()["Hash"]

None


In [9]:
upload_to_lighthouse = upload_to_lighthouse("retrieved_images/image1.jpg")

200
{"Name":"image1.jpg","Hash":"bafkreiferkqrxztl4b3d65zf2za6fykilzfzm6gbj2ocoqcqyrqagtduzu","Size":"68069"}
