In [29]:
import pdfplumber
import re
import pandas as pd

def extract_text_from_pdf(pdf_path):
    text = ''
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + '\n'
    return text

In [30]:
import spacy

# โหลดโมเดลภาษาอังกฤษ (ใช้สำหรับ NER)
nlp = spacy.load("en_core_web_sm")


def extract_resume_data(text):
    email_pattern = r'[\w\.-]+@[\w\.-]+'
    phone_pattern = r'(\+?\d[\d\s-]{8,15}\d)'

    skills_keywords = [
        'Python', 'Java', 'C++', 'SQL', 'TensorFlow', 'Keras',
        'Pandas', 'NumPy', 'Machine Learning', 'AI',
        'Deep Learning', 'HTML', 'CSS', 'JavaScript'
    ]

    # หา Email, Phone
    email = re.findall(email_pattern, text)
    phone = re.findall(phone_pattern, text)

    # ใช้ spaCy เพื่อหา "ชื่อบุคคล" (PERSON)
    doc = nlp(text)
    name = None
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            name = ent.text.strip()
            name = name.split('\n')[0].strip()
            break

    #  Skills
    found_skills = [skill for skill in skills_keywords if re.search(skill, text, re.IGNORECASE)]

    #  Experience
    experience_lines = []
    for line in text.split('\n'):
        if re.search(r'experience|worked|intern|ประสบการณ์|บริษัท', line, re.IGNORECASE):
            experience_lines.append(line.strip())


    extracted_data = {
        'Name': name if name else 'NOne',
        'Email': email[0] if email else 'None',
        'Phone': phone[0] if phone else 'None',
        'Skills': ', '.join(found_skills) if found_skills else 'None',
        'Experience': '\n'.join(experience_lines) if experience_lines else 'None'
    }
    return extracted_data


In [31]:
pdf_path = 'data/sample_resume.pdf'  
text = extract_text_from_pdf(pdf_path)
resume_data = extract_resume_data(text)

# แสดงผลแบบ dictionary ด้วย
resume_data


{'Name': 'Supawan Kongsapcharoen',
 'Email': 'supawankongsapcharoen@gmail.com',
 'Phone': '+66 95-581-0440',
 'Skills': 'Python, Java, C++, SQL, Pandas, NumPy, Machine Learning, AI, JavaScript',
 'Experience': 'Machine Learning. Skilled in SQL, Python, and data visualization, with experience as a Teaching Assistant\nmodeling. Seeking an internship opportunity to apply analytical thinking and problem-solving skills in real-\nPROJECT EXPERIENCE\nEXPERIENCE'}