In [1]:
import os
import re
import csv
from PyPDF2 import PdfReader

In [2]:
def extract_text_from_first_page(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PdfReader(file)
        first_page_text = reader.pages[0].extract_text()
    return first_page_text

def extract_metadata(text):
    email = re.search(r'[\w\.-]+@[\w\.-]+', text)
    phone = re.search(r'(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}', text)
    linkedin = re.search(r'linkedin\.com\/in\/([\w-]+)', text)

    return {
        'email': email.group() if email else None,
        'phone': phone.group() if phone else None,
        'linkedin': linkedin.group(0) if linkedin else None
    }

def summarize_text(text):
    return text[:200] + '...' if len(text) > 200 else text

def process_resumes(folder_path):
    results = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.pdf'):
            pdf_path = os.path.join(folder_path, filename)
            text = extract_text_from_first_page(pdf_path)
            metadata = extract_metadata(text)
            summary = summarize_text(text)
            results.append({'file_name': filename, **metadata, 'summary': summary})

    return results

In [3]:
def save_to_csv(results, output_file):
    headers = ['file_name', 'email', 'phone', 'linkedin', 'summary']

    file_count = 1
    new_output_file = output_file
    while os.path.exists(new_output_file):
        file_count += 1
        new_output_file = f"{output_file.split('.')[0]}_{file_count}.csv"

    with open(new_output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=headers)
        writer.writeheader()
        writer.writerows(results)

    print(f"CSV file saved successfully: {os.path.abspath(new_output_file)}")


In [5]:
if __name__ == '__main__':
    folder_path = '/path/to/folder'
    output_file = 'resume_summary.csv'

    results = process_resumes(folder_path)
    save_to_csv(results, output_file)

CSV file saved successfully: /Users/diksha_bisht/All Jupiter Codes/resume_summary.csv
