In [2]:
import fitz  # PyMuPDF
import re

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def extract_email(text):
    match = re.search(r'[\w\.-]+@[\w\.-]+\.\w+', text)
    return match.group(0) if match else None

def extract_phone(text):
    match = re.search(r'\b\d{10}\b', text)
    return match.group(0) if match else None

def extract_name(text):
    lines = text.strip().split('\n')
    for line in lines:
        # Heuristic: name is likely to be in the first few lines
        if re.match(r'^[A-Z][a-z]+\s[A-Z][a-z]+(?:\s[A-Z][a-z]+)?$', line.strip()):
            return line.strip()
    return None

def extract_section(text, header):
    pattern = re.compile(rf"{header}\n(.*?)\n(?:\w|\Z)", re.DOTALL | re.IGNORECASE)
    match = pattern.search(text)
    return match.group(1).strip() if match else None

def extract_education(text):
    if "Education" in text:
        edu_section = text.split("Education")[1]
        if "Technical Skills" in edu_section:
            edu_section = edu_section.split("Technical Skills")[0]
        return edu_section.strip()
    return None

def extract_experience(text):
    if "Experience" in text:
        exp_section = text.split("Experience")[1]
        # Truncate before next section
        for end_marker in ["Community Outreach", "Awards", "Projects", "Publications", "Workshops"]:
            if end_marker in exp_section:
                exp_section = exp_section.split(end_marker)[0]
                break
        return exp_section.strip()
    return None

# -------------------------------
# Example Usage
# -------------------------------
pdf_path = "../test-resume/Siddharth_Reddy_Resume.pdf"  # Replace with your file path
text = extract_text_from_pdf(pdf_path)

info = {
    "Name": extract_name(text),
    "Email": extract_email(text),
    "Phone Number": extract_phone(text),
    "Education": extract_education(text),
    "Work Experience": extract_experience(text)
}

for key, value in info.items():
    print(f"\n=== {key} ===")
    print(value)



=== Name ===
Anthireddy Siddharth Reddy

=== Email ===
siddharthreddy2812@gmail.com

=== Phone Number ===
None

=== Education ===
B.Tech in Electrical and Computer Engineering
2021 – 2025
Amrita Vishwa Vidhyapeetham
CGPA: 9/10
Telangana State Board of Intermediate

=== Work Experience ===
AI Full Stack Developer at SymboSystems
Oct 2024 - Present
• Led the development of an advanced AI system leveraging large language models (LLMs) and
multimodal interactions, integrating cutting-edge AI technologies to deliver seamless user
experiences by rendering UI like forms, cards, carousels etc.
• Engineered distributed training and inference pipelines, incorporating Claude and GPT models
with a custom Retrieval-Augmented Generation (RAG) architecture utilizing FAISS for efficient
vector storage and retrieval.
• Designed and implemented a real-time audio-visual processing pipeline by integrating Deepgram
for speech-to-text and LiveKit for low-latency streaming, enabling responsive, multimodal u