# CVInsight Minimal Example

A minimal example of using CVInsight for resume parsing.

In [1]:
# Basic imports
import os
import sys
import pandas as pd

# Add CVInsight to path and import core functions
sys.path.insert(0, "/Users/samcelarek/Documents/CVInsight")
from cvinsight.notebook_utils import initialize_client, parse_single_resume, parse_many_resumes, find_resumes

# Get API key (from environment or direct input)
api_key = os.environ.get("OPEN_AI_API_KEY")
if not api_key:
    api_key = input("Enter your API key: ")

# Initialize client
client = initialize_client(api_key=api_key)
print("Client initialized")

Client initialized


In [2]:
# Find resumes in directory
resume_dir = "../Resumes"
resume_paths = find_resumes(resume_dir)
print(f"Found {len(resume_paths)} resumes")

# Parameters for resume parsing
job_description = "Data Analyst with Python and SQL skills"
date_of_resume_submission = "2025-05-21"

Found 21 resumes


In [3]:
# Parse a single resume
if resume_paths:
    try:
        result = parse_single_resume(
            client=client,
            resume_path=resume_paths[0],
            date_of_resume_submission=date_of_resume_submission,
            job_description=job_description
        )
        
        # Safe display of key information with error handling
        print(f"Name: {result.get('name', 'Not available')}")
        print(f"Email: {result.get('email', 'Not available')}")
        
        # Safely get skills
        skills = result.get('skills', [])
        if skills:
            skill_str = ', '.join(skills[:5]) + ('...' if len(skills) > 5 else '')
            print(f"Skills: {skill_str}")
        else:
            print("Skills: Not available")
        
        # Safely get YoE values (handling string/float conversion issues)
        print(f"\nYears of Experience:")
        print(f"YoE (raw): {result.get('YoE', 'Not available')}")
        print(f"YoE (numeric): {result.get('all_work_yoe_numeric', 'Not available')}")
        print(f"Education YoE: {result.get('all_edu_yoe', 'Not available')}")
        print(f"Total YoE (numeric): {result.get('all_total_yoe', 'Not available')}")
        print(f"Relevant YoE: {result.get('relevant_total_yoe', 'Not available')}")
        
        # Display additional info
        print("\nEducation and Work Experience:")
        # Display education info safely
        educations = result.get('educations', [])
        if educations:
            print("Education:")
            for edu in educations[:2]:  # Show only first 2 for brevity
                degree = edu.get('degree', 'Unknown')
                institution = edu.get('institution', 'Unknown')
                print(f"- {degree} at {institution}")
        else:
            print("Education: Not available")
            
        # Display work experience info safely
        work_experiences = result.get('work_experiences', [])
        if work_experiences:
            print("\nWork Experience:")
            for exp in work_experiences[:2]:  # Show only first 2 for brevity
                title = exp.get('title', 'Unknown position')
                company = exp.get('company', 'Unknown company')
                print(f"- {title} at {company}")
        else:
            print("Work Experience: Not available")
    except Exception as e:
        print(f"❌ Error parsing resume: {str(e)}")
        import traceback
        traceback.print_exc()
        # Continue execution despite error

Name: Wesley Ordoñez
Email: wesordonez1@gmail.com
Skills: CSS, HTML, JavaScript, Python, SQL...

Years of Experience:
YoE (raw): 6 Years 11 Months
YoE (numeric): 6.0
Education YoE: 0
Total YoE (numeric): 6.0
Relevant YoE: 8.1

Education and Work Experience:
Education:
- Bachelor of Science in Mechanical Engineering (concentration: Design Engineering) at ROSE-HULMAN INSTITUTE OF TECHNOLOGY
- Data Science and Machine Learning, Google Data Analytics Professional Certificate at ONLINE COURSEWORK (UDEMY/GOOGLE)

Work Experience:
- Unknown position at Puerto Rican Cultural Center
- Unknown position at Versatech LLC


In [4]:
# Parse multiple resumes (with minimal parameters)
if len(resume_paths) > 1:
    try:
        # Limit to 3 resumes for quick demo
        resumes_to_parse = resume_paths[:3]
        
        # Parse resumes with error handling
        df = parse_many_resumes(
            client=client,
            resume_paths=resumes_to_parse,
            date_of_resume_submission=date_of_resume_submission,
            job_description=job_description,
            parallel=True
        )
        
        # Show results
        print(f"Processed {len(df)} resumes")
        print(f"Successfully parsed: {(df['parsing_status'] == 'success').sum()}")
        
        # Check for errors
        failed = df[df['parsing_status'] == 'failed']
        if not failed.empty:
            print(f"Failed to parse {len(failed)} resumes")
            for _, row in failed.iterrows():
                print(f"- {row.get('filename', 'Unknown file')}: {row.get('error', 'Unknown error')}")
        
        # Add numeric YoE columns if they don't exist (fall back safely)
        numeric_cols = ['all_work_yoe_numeric', 'all_edu_yoe_numeric', 'all_total_yoe']
        for col in numeric_cols:
            if col not in df.columns:
                df[col] = 0
        
        # Show YoE statistics (with safe access)
        try:
            successful_df = df[df['parsing_status'] == 'success']
            if not successful_df.empty and 'all_total_yoe' in successful_df.columns:
                print("\nYoE Statistics:")
                print(f"Average Total YoE: {successful_df['all_total_yoe'].mean():.1f} years")
                print(f"Min Total YoE: {successful_df['all_total_yoe'].min():.1f} years")
                print(f"Max Total YoE: {successful_df['all_total_yoe'].max():.1f} years")
        except Exception as e:
            print(f"Could not calculate YoE statistics: {str(e)}")
        
        # Dynamically show whatever columns are available (with safe access)
        cols = [col for col in ['filename', 'name', 'email', 'YoE', 'all_total_yoe', 'parsing_status'] if col in df.columns]
        if cols:
            display(df[cols].head())
        else:
            print("No standard columns available to display")
            display(df.head())
    except Exception as e:
        print(f"❌ Error in batch processing: {str(e)}")
        import traceback
        traceback.print_exc()

Parsing resumes: 100%|██████████| 3/3 [00:51<00:00, 17.32s/it]

Processed 3 resumes
Successfully parsed: 3

YoE Statistics:
Average Total YoE: 4.7 years
Min Total YoE: 3.0 years
Max Total YoE: 6.0 years





Unnamed: 0,filename,name,email,YoE,all_total_yoe,parsing_status
0,2023-08-28 - Wesley Ordonez Resume Wesley Ordo...,Wesley Ordoñez,wesordonez1@gmail.com,6 Years 11 Months,6.0,success
1,2023-08-26 - Akhil Bukkapuram Resume Akhil_ds.pdf,Akhil Bukkapuram,bakhil@ncsu.edu,5 Years 0 Months,5.0,success
2,2023-08-20 - Weihao Chen Resume Resume_Weihao ...,Weihao Chen,wchen151@jh.edu,3 Years 0 Months,3.0,success


In [5]:
# Quick save to CSV
if 'df' in locals():
    df.to_csv("resume_results.csv", index=False)
    print("Results saved to resume_results.csv")

Results saved to resume_results.csv


## Understanding YoE Conversion

The system extracts years of experience in this format: "X Years Y Months". This is stored in the `YoE` field.

For numerical calculations, the system provides these fields:
- `all_work_yoe_numeric`: The numeric value of work experience years 
- `all_edu_yoe_numeric`: The numeric value of education years
- `all_total_yoe`: The sum of work and education years

When YoE values are in string format like "6 Years 11 Months", the numeric fields extract the first number (6) for calculations.

### Error Handling

The system includes robust error handling:
1. If YoE is a proper number, it's used directly
2. If YoE is a string, regex extracts the first number
3. If conversion fails, numeric fields default to 0
4. If there's a processing error, all fallback safely

This ensures notebooks won't fail even with varied YoE formats.