In [83]:
import re, json
import pdfplumber
import docx


In [85]:
def extract_text(file_path: str) -> str:
    fp = file_path.lower()
    if fp.endswith(".pdf"):
        out = []
        with pdfplumber.open(file_path) as pdf:
            for p in pdf.pages:
                out.append(p.extract_text() or "")
        return "\n".join(out)

    if fp.endswith(".docx"):
        d = docx.Document(file_path)
        parts = []

        
        for para in d.paragraphs:
            parts.append(para.text or "")

       
        for tbl in d.tables:
            for row in tbl.rows:
                for cell in row.cells:
                    for para in cell.paragraphs:
                        parts.append(para.text or "")

       
        try:
            for sec in d.sections:
                for para in sec.header.paragraphs:
                    parts.append(para.text or "")
                for para in sec.footer.paragraphs:
                    parts.append(para.text or "")
        except Exception:
            pass

        return "\n".join(parts)

    if fp.endswith(".txt"):
        return open(file_path, "r", encoding="utf-8", errors="ignore").read()

    raise ValueError("Unsupported file format: Only .pdf, .docx, .txt are supported")



In [87]:
def extract_projects_improved(text):
    lines = text.splitlines()
    start = None
    for i, line in enumerate(lines):
        if re.search(r"\bprojects?\b", line, re.IGNORECASE):
            start = i + 1
            break
    if start is None:
        print("Projects section not found.")
        return []
    stop_sections = {"STRENGTHS", "DECLARATION", "CERTIFICATIONS", "EDUCATION", "SKILLS", "EXPERIENCE", "ACHIEVEMENTS"}
    buf = []
    for line in lines[start:]:
        striped_line = line.strip()
        if striped_line.upper() in stop_sections:
            break
        buf.append(striped_line)
    projects = []
    current_project = []
    for line in buf:
        if re.match(r"^[A-Z\s]+$", line) and len(line) > 3:
            if current_project:
                projects.append(" ".join(current_project).strip())
                current_project = []
            current_project.append(line)
        else:
            if line != "":
                current_project.append(line)
    if current_project:
        projects.append(" ".join(current_project).strip())
    return projects


In [89]:
def extract_certifications(text):
    lines = text.splitlines()
    start = None
    for i, line in enumerate(lines):
        if re.search(r"\bcertifications?\b", line, re.IGNORECASE):
            start = i + 1
            break
    if start is None:
        print("Certifications section not found.")
        return []
    stop_sections = {"STRENGTHS", "DECLARATION", "PROJECTS", "EDUCATION", "SKILLS", "EXPERIENCE", "ACHIEVEMENTS"}
    buf = []
    for line in lines[start:]:
        striped_line = line.strip()
        if striped_line.upper() in stop_sections:
            break
        if striped_line != "":
            buf.append(striped_line)
    return buf

In [91]:
def extract_achievements(text):
    lines = text.splitlines()
    start = None
    for i, line in enumerate(lines):
        if re.search(r"\bachievements?\b", line, re.IGNORECASE):
            start = i + 1
            break
    if start is None:
        print("Achievements section not found.")
        return []
    stop_sections = {"STRENGTHS", "DECLARATION", "PROJECTS", "EDUCATION", "SKILLS", "EXPERIENCE", "CERTIFICATIONS"}
    buf = []
    for line in lines[start:]:
        striped_line = line.strip()
        if striped_line.upper() in stop_sections:
            break
        if striped_line != "":
            buf.append(striped_line)
    return buf


In [93]:
if __name__ == "__main__":
    file_path = r"C:\Users\akhil\OneDrive\Documents\Akhila 2 resume (2).docx"
    raw_text = extract_text(file_path)
    
    projects = extract_projects_improved(raw_text)
    print("==== Projects ====")
    for p in projects:
        print("\n---\n")
        print(p)
    
    certifications = extract_certifications(raw_text)
    print("\n==== Certifications ====")
    for cert in certifications:
        print(cert)
    
    achievements = extract_achievements(raw_text)
    print("\n==== Achievements ====")
    for ach in achievements:
        print(ach)

==== Projects ====

---

CROSS PLATFORM REPUTATION GENERATION SYSTEM BASED ON ASPECT BAESD SENTIMENT ANALYSIS Developed a cross-platform reputation system capable of collecting and standardizing opinions from diverse platforms (Facebook, Amazon, Twitter, Trip Advisor), ensuring comprehensive and accurate reputation analysis. Implemented advanced spam filtering using behavioral features to detect and eliminate spam, ensuring the authenticity of opinions used in the reputation calculation. Tools & Algorithms used: Python, Extra tree classifier, SVM algorithm, and logistic regression.

---

ACTION ASSIST Built a voice assistant which can translate and communicate in regional language (Telugu) Developed project using Libraries SpeechRecognition, google-cloud-speech, pyaudio(Converts speech to text, enabling voice input). Designed and Developed NLP with Django Framework. Translates text between languages (e.g., English to Telugu) by using Translation libraries( googletrans, google-cloud-tra