In [None]:
import os
import PyPDF2
import re
import pandas as pd

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        text = ''
        for page_num in range(len(pdf_reader.pages)):
            text += pdf_reader.pages[page_num].extract_text()
    return text

def extract_information(text):
    # Extracting name, email, and phone number using regular expressions
    name_match = re.search(r'([^\d]+)', text)
    name = name_match.group(0).strip() if name_match else None
    
    email_match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
    email = email_match.group(0) if email_match else None
    
    phone_number_match = re.search(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', text)
    phone_number = phone_number_match.group(0) if phone_number_match else None

    return name, email, phone_number

def extract_skills(text, required_skills):
    found_skills = []
    for skill in required_skills:
        if re.search(r'\b{}\b'.format(skill), text, re.IGNORECASE):
            found_skills.append(skill)
    return found_skills

def main():
    # Prompt the user to enter the folder where the PDFs are located
    folder_path = input("Enter the folder path where the PDFs are located: ")

    # Prompt the user to enter the skills they are looking for
    skills_input = input("Enter the skills you are looking for (comma-separated): ")
    required_skills = [skill.strip() for skill in skills_input.split(',')]
    
    # Find the total number of skills entered by the user
    total_skills = len(required_skills)

    results = []

    # Process all PDFs in the specified folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".pdf"):
            pdf_path = os.path.join(folder_path, filename)
            extracted_text = extract_text_from_pdf(pdf_path)
            name, email, phone_number = extract_information(extracted_text)
            found_skills = extract_skills(extracted_text, required_skills)

            # Create a dictionary with 1 or 0 for each skill
            skill_dict = {skill: 1 if skill in found_skills else 0 for skill in required_skills}

            results.append({
                'PDF Name': os.path.splitext(filename)[0],
                'Name': name,
                'Email': email,
                'Phone Number': phone_number,
                **skill_dict,
                'Total Skills': len(found_skills),
                'Total Skills (User Defined)': total_skills,
            })

    # Create a DataFrame from the results
    df = pd.DataFrame(results)

    # Display the DataFrame
    print("\nDataFrame:")
    print(df)

if __name__ == "__main__":
    main()
