In [11]:
import pdfplumber
import os
import pandas as pd

In [12]:
def extract_text_from_pdf(pdf_path):
    """Extract text from a single PDF file."""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            full_text = ''
            for page in pdf.pages:
                full_text += page.extract_text() or ''  # Extract text from all pages
            return full_text.strip()  # Clean up whitespace
    except Exception as e:
        print(f"Error reading {pdf_path}: {e}")
        return ''

In [13]:
# Load the existing CSV file
csv_file_path = '/Users/macbookair/Desktop/project/Resume_Analyser/resume-job-matching/data/UpdatedResumeDataSet.csv'
resume_df = pd.read_csv(csv_file_path)

# Ensure columns are present
if 'Category' not in resume_df.columns or 'Resume' not in resume_df.columns:
    raise ValueError("The CSV must have 'Category' and 'Resume' columns.")

# Path to the folder containing category-named subfolders
pdf_directory = '/Users/macbookair/Desktop/project/Resume_Analyser/resume-job-matching/data/Raw'


In [14]:
# Initialize a dictionary to collect resume data
new_data = []

# Iterate through category folders
for category in os.listdir(pdf_directory):
    category_path = os.path.join(pdf_directory, category)
    
    if os.path.isdir(category_path):  # Check if it's a directory
        for pdf_file in os.listdir(category_path):
            if pdf_file.endswith('.pdf'):  # Only process PDF files
                pdf_path = os.path.join(category_path, pdf_file)
                extracted_text = extract_text_from_pdf(pdf_path)
                
                # Add category and text to the new data list
                new_data.append({'Category': category, 'Resume': extracted_text})

# Create a DataFrame from the new data
new_resume_df = pd.DataFrame(new_data)

# Concatenate with the existing DataFrame (if required)
resume_df = pd.concat([resume_df, new_resume_df], ignore_index=True)

# Save the updated DataFrame to a new CSV
updated_csv_path = '/Users/macbookair/Desktop/project/Resume_Analyser/resume-job-matching/data/UpdatedResumeDataSet_with_extracted_text.csv'
resume_df.to_csv(updated_csv_path, index=False)
print("Resume text extraction and CSV update completed!")

Resume text extraction and CSV update completed!


In [15]:
update = pd.read_csv('/Users/macbookair/Desktop/project/Resume_Analyser/resume-job-matching/data/UpdatedResumeDataSet_with_extracted_text.csv')

In [16]:
pd.set_option('max_colwidth', 800)

In [17]:
update.shape

(3446, 2)

In [18]:
update.tail()

Unnamed: 0,Category,Resume
3441,AUTOMOBILE,"GENERAL LIABILITY CLAIM REPRESENTATIVE\nSummary\nClaims Representative who is highly motivated, success driven with several years experience in the insurance industry.Â\nSkills\nClaims file management processes\nLitigation resolution\nStrong interpersonal and communication skills\nSelf-directed\nTeam player\nCritical thinker\nExperience\n01/2008 to 02/2013\nGeneral Liability Claim Representative Company Name ï¼​ City , State\nRecognized for continued outstanding customer service - nominated for In-Synch Award Successfully determine coverage and communicate\nwith insured based on application of policy information, facts and allegations of each case.\nAggressively investigate, evaluate, reserve, and negotiate assigned claims in accordance with Best Practices.\nOrganize workflow to mainta..."
3442,AUTOMOBILE,"AUTOMOBILE TRANSPORTER\nProfessional Summary\nDedicated, responsible Class A driver with a clean driver's license. Self-motivated and customer-focused.Exceptional leader talented at leadership,\ncommunication, and management skills. Excellent team player with over 20 years of experience in the military and trucking industry.Talented\nMechanist Tech with more than 8 years of experience developing and executing maintenance programs. Excellent troubleshooting skills. Areas of\nexpertise include mechanics, technology, and management.\nKey Skills\nQuality control Solid communication skills\nOSHA regulatory compliance Strong organizational skills\nDependable independent worker Strong work ethic\nClean D.O.T. Team player\nGood at following instructions Timely project completion\nWork Experien..."
3443,AUTOMOBILE,"Highlights\nProg. Languages: C (5+ yrs), Python (3+ yrs), Java (3+ yrs), MATLAB (Simulink) (5+ yrs), R (2 yrs), Processing (2yrs), SQL(4+ yrs),\nPLC(2 yrs)\nDoc. Editing: Word/PPT/Excel, Pages/Numbers/Keynote, LATEX\nMechanical Design: AutoCAD (6 yrs), Solidworks (5+ yrs)\nMechanical Skills: MakerBot 3D print, Laser cut, Mill, Drill, Lathe Machine.\nStatistics Softwares: STATA, SPSS\nDatabase Softwares: SQL Server (4 yrs), Navicat (2 yrs)\nOperating Systems: Windows 7/10, OS X\nExperience\nCompany Name June 2016 to Current R&D Product Development Engineer\nCity\nDesign and build a tail-sitter VTOL(vertical take off and landing) UAV(unmanned aerial vehicle) which.\ntakes off and lands vertically and travels horizontally.\nMain duties include but not limit to aerodynamics.\nmodeling, UAV..."
3444,AUTOMOBILE,"CUSTOMER RELATIONS SPECIALIST\nSummary\nTo obtain a position with a company that offers the chance for me to utilize my call center and customer service skills to the best of my ability. As\nwell as an opportunity for advancement and a competitive salary. I am a highly personable Call Center Representative with experience in Customer\nService, Collections, and Call Center Operations.\nAccomplishments\nReached monthly sales goals on several occasions.\nExperience\nCustomer Relations Specialist\nJanuary 2015 to Current Company Name ï¼​ City , State\nProvide Honda clients a resource to register concerns, complaints, and request for assistance as outlined in the owner's manual; ensure best\npossible outcome.\nRespond to Client questions and concerns and provide solutions whenever possible ..."
3445,AUTOMOBILE,"SENIOR SPECIAL INVESTIGATOR\nSummary\nTo work within a successful Special Investigation Unit and work effectively with others. I worked as a Sworn Police Officer for 10 years and have\nbeen employed as a Sr. Special Investigator or Manager within the insurance industry for 30 years. When working with the insurance companies, I\nhave been in the Special Investigation Unit combating fraud and investigating questionable claims. When a claim is determined to be fraudulent or\nhave elements of fraud, I have filed numerous cases with the State's DOI Fraud Bureau and the appropriate counties District Attorney's Office. I\nhave also conducted numerous training sessions with the Insurance industry on how to recognize the 'red flags' of insurance fraud.\nHighlights\nInsurance fraud expertise\nDa..."
