In [None]:
# =========================================================
# Multimodal Data Processing & LLM Q&A (One-Cell Demo)
# FIXES:
# 1. Corrected types.Part creation from .from_text() to Part(text=...).
# 2. Added try...finally block for explicit client cleanup to avoid AttributeError.
# =========================================================
# 1Ô∏è‚É£ Install libraries
!pip install --quiet pdfplumber python-docx python-pptx pillow pytesseract sentence-transformers google-genai fpdf yt-dlp

import os
from docx import Document
from pptx import Presentation
from fpdf import FPDF
from PIL import Image, ImageDraw
import pdfplumber
import pytesseract
from google import genai
from google.genai import types # Import types for configuration
import getpass

print("‚úÖ Libraries installed and ready.")

# 2Ô∏è‚É£ Create demo folder & files
os.makedirs("demo_data", exist_ok=True)

# TXT
with open("demo_data/demo_text.txt", "w") as f:
    f.write("This is a sample TXT file. Sruthi has 5 years of experience in data science. She worked at two companies.")

# DOCX
doc = Document()
doc.add_heading("Demo Document", 0)
doc.add_paragraph("This is a sample DOCX document. John is a software engineer with 3 years of experience.")
doc.save("demo_data/demo_doc.docx")

# PDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt="This is a sample PDF file. The project budget is $50,000.", ln=True)
pdf.output("demo_data/demo_pdf.pdf")

# PPTX
ppt = Presentation()
slide = ppt.slides.add_slide(ppt.slide_layouts[5])
slide.shapes.title.text = "Demo PPT Slide"
ppt.save("demo_data/demo_ppt.pptx")

# IMAGE
img = Image.new('RGB', (300, 100), color=(73, 109, 137))
d = ImageDraw.Draw(img)
d.text((10,40), "Demo Image Text", fill=(255,255,0))
img.save("demo_data/demo_img.jpg")

print("‚úÖ Demo files created in demo_data/")

# 3Ô∏è‚É£ Optional: Upload your own files
# NOTE: This part relies on the Google Colab environment.
try:
    from google.colab import files
    uploaded = files.upload()
    for filename in uploaded.keys():
        os.rename(filename, os.path.join("demo_data", filename))
    if uploaded:
        print("‚úÖ Uploaded files moved to demo_data/")
except ImportError:
    print("‚ö†Ô∏è Skipping file upload. Not running in Google Colab environment.")


# 4Ô∏è‚É£ Parse all files
file_contents = {}

def parse_pdf(path):
    text=[]
    with pdfplumber.open(path) as pdf:
        for page in pdf.pages:
            text.append(page.extract_text() or "")
    return "\n".join(text)

def parse_docx(path):
    doc = Document(path)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

def parse_pptx(path):
    prs = Presentation(path)
    texts=[]
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape,"text"):
                texts.append(shape.text)
    return "\n".join(texts)

def parse_txt(path):
    return open(path).read()

def parse_image(path):
    # This function uses Pytesseract for OCR
    return pytesseract.image_to_string(Image.open(path))

for f in os.listdir("demo_data"):
    path = os.path.join("demo_data", f)
    if f.endswith(".pdf"): file_contents[f] = parse_pdf(path)
    elif f.endswith(".docx"): file_contents[f] = parse_docx(path)
    elif f.endswith(".pptx"): file_contents[f] = parse_pptx(path)
    elif f.endswith(".txt"): file_contents[f] = parse_txt(path)
    elif f.endswith((".jpg",".png")): file_contents[f] = parse_image(path)

print("üìÅ Files processed:", list(file_contents.keys()))

# 5Ô∏è‚É£ LLM Setup (GEMINI API)

# Get API key securely
try:
    API_KEY = getpass.getpass("Enter your Gemini API Key: ")
    if not API_KEY:
        raise ValueError("API Key cannot be empty.")
    client = genai.Client(api_key=API_KEY)
except (ImportError, ValueError) as e:
    print(f"‚ùå Error setting up client: {e}. Please ensure you are running in an environment that supports getpass or set the GEMINI_API_KEY environment variable.")
    client = None

if client:
    print("‚úÖ LLM ready for interactive queries.")

# 6Ô∏è‚É£ Interactive LLM Q&A
def ask_llm(question, context_texts):
    context_combined = "\n\n".join([f"--- {fname} ---\n{txt}" for fname, txt in context_texts.items()])

    # Construct the full prompt including System Instruction and User Question
    prompt_parts = [
        types.Content(
            role="user",
            parts=[
                # *** CORRECTED SYNTAX: Used Part(text=...) instead of Part.from_text(...) ***
                types.Part(text=f"""
You are an intelligent assistant. Answer the user's question based ONLY on the following documents.
If the information is not present, state that clearly.

--- DOCUMENTS START ---
{context_combined}
--- DOCUMENTS END ---

Question: {question}
Answer in short, precise, and natural language.
""")
            ]
        )
    ]

    # Configuration for the generation
    config = types.GenerateContentConfig(
        temperature=0.3,
        max_output_tokens=500
    )

    # Gemini API Call
    response = client.models.generate_content(
        model="gemini-2.5-flash", # A great, fast model for chat and RAG
        contents=prompt_parts,
        config=config
    )

    return response.text

# Main loop execution with robust cleanup
if client:
    try:
        while True:
            q = input("\nüí¨ Ask a question (e.g., 'How much experience does Sruthi have?' or type 'exit'): ")
            if q.lower()=="exit":
                print("üëã Exiting demo.")
                break

            try:
                answer = ask_llm(q, file_contents)
                print("\nü§ñ LLM Answer:\n", answer)
            except Exception as e:
                # Catch specific API errors during content generation
                print(f"\n‚ùå An error occurred during API call: {e}")

    finally:
        # Explicitly close the client to avoid the 'AttributeError' cleanup warning
        client.close()
        print("\n‚úÖ Gemini Client explicitly closed and resources released.")
else:
    print("\n‚ö†Ô∏è Cannot run Q&A. Client failed to initialize.")

‚úÖ Libraries installed and ready.
‚úÖ Demo files created in demo_data/


Saving V_S_Sruthi_Resume_ATS_v2 (1).pdf to V_S_Sruthi_Resume_ATS_v2 (1).pdf
‚úÖ Uploaded files moved to demo_data/
üìÅ Files processed: ['demo_img.jpg', 'demo_pdf.pdf', 'V_S_Sruthi_Resume_ATS_v2 (1).pdf', 'demo_text.txt', 'demo_ppt.pptx', 'demo_doc.docx']
‚úÖ LLM ready for interactive queries.

ü§ñ LLM Answer:
 Sruthi has over 9 years of experience.

ü§ñ LLM Answer:
 None

ü§ñ LLM Answer:
 None

ü§ñ LLM Answer:
 Sruthi passed out with a B.Tech in 2015.

ü§ñ LLM Answer:
 None

ü§ñ LLM Answer:
 V. S. Sruthi's resume mentions the following technical skills:
*   **Programming:** ITIS Basics, Linux Administration, OOP Concepts, HTML
*   **Operating Systems:** Windows 2000/XP/7/10
*   **Database:** SQL
*   **Frameworks:** ITIL (Incident, Problem & Change Management)
*   **Tools:** ServiceNow, PuTTY, JIRA, Google Internal Tools

ü§ñ LLM Answer:
 I state "None" when the requested information is not present in the documents I am given.

ü§ñ LLM Answer:
 Yes, information about Sruthi's 