In [None]:
# Initialize the LLaMA model from Ollama
MODEL = "llama3"
model = Ollama(model=MODEL)

In [67]:
import re
from PyPDF2 import PdfReader

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Please ensure that the 'Years of Experience' includes only professional job or internship experience, not education experience.

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''


# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''


# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")


# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info


# Step 6: Example usage of the functions

# Define the PDF file paths
resume_pdf_path = 'resume4.pdf'
jd_pdf_path = 'jd2.pdf'

# Extract text from the resume and job description PDFs
resume_text = read_pdf(resume_pdf_path)
jd_text = read_pdf(jd_pdf_path)

if resume_text and jd_text:
    # Extract structured information from both the resume and the job description
    resume_info_raw = extract_resume_info(resume_text)
    jd_info_raw = extract_jd_info(jd_text)

    # Parse the raw extracted information into dictionaries (optional for verification)
    resume_info = parse_extracted_info(resume_info_raw)
    jd_info = parse_extracted_info(jd_info_raw)

    # Step 7: Save extracted resume and JD information to .txt files
    save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
    save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')

else:
    print("Error: Could not extract text from one or both PDFs.")


Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt


In [60]:
import re
from PyPDF2 import PdfReader
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - Companies worked with

    Please ensure that the 'Years of Experience' includes only professional job or internship experience, not education experience.

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''


# Step 4: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''


# Step 3: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")


# Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info


# Import fuzzy matching for better job title and skill comparison
from difflib import SequenceMatcher

def fuzzy_match(a, b):
    """Returns a ratio of similarity between two strings using fuzzy matching."""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def score_resume(resume_info, jd_info):
    """Scores the resume based on matching with the job description."""
    try:
        score = 0
        max_score = 100
        weights = {
            "skills": 0.6,
            "experience": 0.3,
            "job_title": 0.1
        }

        # --- Skills Matching ---
        candidate_skills = set(map(str.strip, resume_info.get('Skills', '').split(',')))
        job_skills = set(map(str.strip, jd_info.get('Required Skills', '').split(',')))

        exact_matches = candidate_skills.intersection(job_skills)
        partial_matches = set()

        # Check for partial/fuzzy matches between skills
        for c_skill in candidate_skills:
            for j_skill in job_skills:
                if fuzzy_match(c_skill, j_skill) > 0.7:  # Set a threshold for fuzzy match
                    partial_matches.add(c_skill)

        # Combine exact and partial matches, but avoid double counting
        total_skill_matches = exact_matches.union(partial_matches)
        skill_match_ratio = len(total_skill_matches) / len(job_skills) if job_skills else 0
        score += skill_match_ratio * weights['skills'] * max_score

        # --- Experience Matching ---
        candidate_experience = int(re.findall(r'\d+', resume_info.get('Years of Experience', '0'))[0]) if resume_info.get('Years of Experience') else 0
        job_experience_required = int(re.findall(r'\d+', jd_info.get('Years of Experience required', '0'))[0]) if jd_info.get('Years of Experience required') else 0

        # If candidate experience is within a reasonable range, scale proportionally
        if candidate_experience >= job_experience_required:
            experience_match_ratio = job_experience_required / candidate_experience if candidate_experience else 0
            score += min(1, experience_match_ratio) * weights['experience'] * max_score
        elif candidate_experience < job_experience_required:
            # Scale down score if candidate has less experience than required
            experience_match_ratio = candidate_experience / job_experience_required if job_experience_required else 0
            score += experience_match_ratio * weights['experience'] * max_score

        # --- Job Title Matching ---
        resume_job_titles = [title.lower() for title in resume_info.get('Job Titles', '').splitlines()]
        jd_job_title = jd_info.get('Job Title', '').lower()

        # Fuzzy match the job titles
        best_title_match = max(fuzzy_match(resume_title, jd_job_title) for resume_title in resume_job_titles) if resume_job_titles else 0

        # Add the job title score if a good match exists
        if best_title_match > 0.7:  # Adjust threshold for good match
            score += best_title_match * weights['job_title'] * max_score

        return round(score, 2)  # Return score rounded to 2 decimal places

    except Exception as e:
        print(f"Error in scoring resume: {e}")
        return 0


# Step 6: Example usage of the functions

# Define the PDF file paths
resume_pdf_path = 'resume4.pdf'
jd_pdf_path = 'jd3.pdf'

# Extract text from the resume and job description PDFs
resume_text = read_pdf(resume_pdf_path)
jd_text = read_pdf(jd_pdf_path)

if resume_text and jd_text:
    # Extract structured information from both the resume and the job description
    resume_info_raw = extract_resume_info(resume_text)
    jd_info_raw = extract_jd_info(jd_text)

    # Parse the raw extracted information into dictionaries
    resume_info = parse_extracted_info(resume_info_raw)
    jd_info = parse_extracted_info(jd_info_raw)

    # Print extracted information for verification
    print("Extracted Resume Information:", resume_info)
    print("Extracted Job Description Information:", jd_info)

    # Step 7: Save extracted resume and JD information to .txt files
    save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
    save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')

    # Step 8: Score the resume
    resume_score = score_resume(resume_info, jd_info)
    print(f"Resume Score: {resume_score:.2f}/100")
else:
    print("Error: Could not extract text from one or both PDFs.")


Extracted Resume Information: {'Here is the extracted information': '', '**Name**': 'Not provided', '**Email**': 'professionalemail@r esumewor ded.com', '**Phone Number**': '+1-234-456-789', '**Job Titles**': 'Machine Learning Engineer, Automation Engineer, Computer Systems Analyst', '**Skills**': 'Deep Learning (Advanced), Predictive Modeling (Experienced), Statistical Analysis, Algorithms, English (Native), German (Fluent), French (Conversational)', '**Years of Experience**': '10 years'}
Extracted Job Description Information: {'Here is the extracted information': '', '* Company Name': 'Google LLC', '* Email': 'johndoe@google.com', '* Phone Number': '(555) 555-5555', '* Job Title': 'Machine Learning Engineer, Software Engineer, Data Scientist', '* Required Skills': 'Python, TensorFlow, Keras, PyTorch, Hadoop, Spark, Excellent problem-solving skills', '* Years of Experience required': '3'}
Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt
Resume 

In [63]:
import re
from difflib import SequenceMatcher

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    """Returns a ratio of similarity between two strings using fuzzy matching."""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    """Removes unwanted symbols like **, *, etc., and normalizes the text."""
    text = re.sub(r'[\*\*]+', '', text)  # Remove any markdown symbols like ** or *
    text = re.sub(r'\s+', ' ', text)     # Replace multiple spaces with a single space
    return text.strip()                  # Strip leading/trailing spaces


# Function to extract skills from text using multiple regex patterns
def extract_skills(text, type="resume"):
    """Extracts skills from the text using multiple patterns for flexibility."""
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',  # Pattern 1: Skills (comma-separated)
        r'\*\*Skills:\*\*\s*(.+)',                # Pattern 2: **Skills:** (markdown format)
        r'\* Skills:\s*(.+)',                     # Pattern 3: * Skills: (generic bullet format)
        r'Skills:\s*(.+)',                        # Pattern 4: Simple Skills: with no special chars
        r'\*\*Required Skills\*\*:\s*(.+)',       # Pattern 5: **Required Skills:** (JD markdown format)
        r'Required Skills:\s*(.+)',               # Pattern 6: Required Skills (JD plain format)
        r'\*\*Skills\*\*:\s*(.+)',                # Pattern 7: **Skills**: (new format with colon-separated info)
    ]
    
    for pattern in patterns:
        skills_regex = re.search(pattern, text)
        if skills_regex:
            # Clean and normalize extracted skills
            return [clean_text(skill.strip().lower()) for skill in skills_regex.group(1).split(',')]
    
    return []  # Return empty list if no match


# Function to extract job titles from text using multiple regex patterns
def extract_job_titles(text, type="resume"):
    """Extracts job titles from the text using multiple patterns for flexibility."""
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',  # Pattern 1: Job Titles (comma-separated)
        r'\*\*Job Titles:\*\*\s*(.+)',               # Pattern 2: **Job Titles:** (markdown format)
        r'\* Job Titles:\s*(.+)',                    # Pattern 3: * Job Titles: (generic bullet format)
        r'Job Titles:\s*(.+)',                       # Pattern 4: Simple Job Titles: with no special chars
        r'\*\*Job Title\*\*:\s*(.+)',                # Pattern 5: **Job Title:** (JD markdown format)
        r'Job Title:\s*(.+)',                        # Pattern 6: Job Title (JD plain format)
        r'\*\*Job Titles\*\*:\s*(.+)',               # Pattern 7: **Job Titles**: (new format with colon-separated info)
    ]
    
    for pattern in patterns:
        job_titles_regex = re.search(pattern, text)
        if job_titles_regex:
            # Clean and normalize extracted job titles
            return [clean_text(title.strip().lower()) for title in job_titles_regex.group(1).split(',')]
    
    return []  # Return empty list if no match


# Function to extract years of experience from text using multiple regex patterns
def extract_experience(text, type="resume"):
    """Extracts years of experience from the text using multiple patterns."""
    patterns = [
        r'Years of Experience:\s*(\d+)',               # Pattern 1: Years of Experience:
        r'\*\*Years of Experience:\*\*\s*(\d+)',       # Pattern 2: **Years of Experience:** (markdown format)
        r'\* Years of Experience:\s*(\d+)',            # Pattern 3: * Years of Experience:
        r'Experience:\s*(\d+)',                        # Pattern 4: Simple Experience:
        r'Experience required:\s*(\d+)',               # Pattern 5: Used in job descriptions (JD)
        r'\*\*Years of Experience required\*\*:\s*(\d+)',  # Pattern 6: **Years of Experience required** (JD markdown format)
        r'\*\*Years of Experience\*\*:\s*(\d+)\s*years?',  # Pattern 7: **Years of Experience**: 10 years (new format with colon-separated info)
    ]
    
    for pattern in patterns:
        experience_regex = re.search(pattern, text)
        if experience_regex:
            return int(clean_text(experience_regex.group(1)))  # Clean and extract the number
    
    return 0  # Return 0 if no match


def calculate_resume_score(resume_text, jd_text):
    """Calculates a score for the resume based on the job description."""
    try:
        # Extract resume info
        resume_skills = extract_skills(resume_text, type="resume")
        resume_job_titles = extract_job_titles(resume_text, type="resume")
        resume_experience = extract_experience(resume_text, type="resume")
        
        # Extract JD info
        jd_skills = extract_skills(jd_text, type="jd")
        jd_job_titles = extract_job_titles(jd_text, type="jd")
        jd_experience_required = extract_experience(jd_text, type="jd")

        # Print extracted information for debugging
        print("Resume Skills:", resume_skills)
        print("Resume Job Titles:", resume_job_titles)
        print("Resume Experience (Years):", resume_experience)

        print("\nJD Skills:", jd_skills)
        print("JD Job Titles:", jd_job_titles)
        print("JD Experience Required (Years):", jd_experience_required)

        # Initialize score variables
        score = 0
        max_score = 100
        weights = {
            "skills": 0.6,  # 60% for skills
            "experience": 0.2,  # 20% for experience
            "job_title": 0.2  # 20% for job titles
        }

        # --- Skills Matching ---
        resume_skills_set = set(resume_skills)
        jd_skills_set = set(jd_skills)

        # Exact skill matches
        exact_skill_matches = resume_skills_set.intersection(jd_skills_set)
        partial_skill_matches = set()

        # Fuzzy matching for partial skill matches
        for resume_skill in resume_skills_set:
            for jd_skill in jd_skills_set:
                if fuzzy_match(resume_skill, jd_skill) > 0.7:  # Fuzzy match with a higher threshold
                    partial_skill_matches.add(resume_skill)

        # Combine exact and partial matches, avoiding double-counting
        total_skill_matches = exact_skill_matches.union(partial_skill_matches)
        skill_match_ratio = len(total_skill_matches) / len(jd_skills_set) if jd_skills_set else 0
        
        # Skills score is based on the match ratio, with more weight given to exact matches
        skill_score = skill_match_ratio * weights['skills'] * max_score
        score += skill_score

        print("\nExact Skill Matches:", exact_skill_matches)
        print("Partial Skill Matches:", partial_skill_matches)
        print("Skill Match Ratio:", skill_match_ratio)
        print("Skill Score:", skill_score)

        # --- Experience Matching ---
        if resume_experience >= jd_experience_required:
            # Full marks for experience if the resume experience is greater than or equal to the JD requirement
            experience_score = weights['experience'] * max_score
        else:
            # Proportional score if resume experience is less than required
            experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
            experience_score = experience_match_ratio * weights['experience'] * max_score

        score += experience_score

        print("Experience Match Ratio:", experience_match_ratio)
        print("Experience Score:", experience_score)

        # --- Job Title Matching ---
        best_title_match = 0
        for resume_title in resume_job_titles:
            for jd_title in jd_job_titles:
                best_title_match = max(best_title_match, fuzzy_match(resume_title, jd_title))

        # Add job title score if a good match exists
        job_title_score = 0
        if best_title_match > 0.7:  # Higher threshold for job title match
            job_title_score = best_title_match * weights['job_title'] * max_score
            score += job_title_score

        print("Best Job Title Match Score:", best_title_match)
        print("Job Title Score:", job_title_score)

        # Return final score rounded to 2 decimal places
        final_score = round(score, 2)
        print("\nFinal Resume Score:", final_score)
        return final_score

    except Exception as e:
        print(f"Error calculating resume score: {e}")
        return 0



# Function to read the contents of a file
def read_file(file_path):
    """Reads and returns the content of the file."""
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return ""

# Example usage: Reading from text files
resume_file = 'extracted_resume_info.txt'  # Path to the resume text file
jd_file = 'extracted_jd_info.txt'          # Path to the JD text file

# Read the contents of the files
resume_text = read_file(resume_file)
jd_text = read_file(jd_file)

# Calculate resume score based on JD
if resume_text and jd_text:
    resume_score = calculate_resume_score(resume_text, jd_text)
    print(f"\nResume Score: {resume_score}/100")
else:
    print("Error: Could not read one or both files.")


Resume Skills: ['deep learning (advanced)', 'predictive modeling (experienced)', 'statistical analysis', 'algorithms', 'english (native)', 'german (fluent)', 'french (conversational)']
Resume Job Titles: ['machine learning engineer', 'automation engineer', 'computer systems analyst']
Resume Experience (Years): 10

JD Skills: ['python', 'tensorflow', 'keras', 'pytorch', 'hadoop', 'spark', 'excellent problem-solving skills']
JD Job Titles: ['machine learning engineer', 'software engineer', 'data scientist']
JD Experience Required (Years): 3

Exact Skill Matches: set()
Partial Skill Matches: set()
Skill Match Ratio: 0.0
Skill Score: 0.0
Error calculating resume score: cannot access local variable 'experience_match_ratio' where it is not associated with a value

Resume Score: 0/100


In [68]:
import re
from difflib import SequenceMatcher

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    """Returns a ratio of similarity between two strings using fuzzy matching."""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    """Removes unwanted symbols like **, *, etc., and normalizes the text."""
    text = re.sub(r'[\*\*]+', '', text)  # Remove any markdown symbols like ** or *
    text = re.sub(r'\s+', ' ', text)     # Replace multiple spaces with a single space
    return text.strip()                  # Strip leading/trailing spaces

# Function to extract skills from text using multiple regex patterns
def extract_skills(text, type="resume"):
    """Extracts skills from the text using multiple patterns for flexibility."""
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',  # Pattern 1: Skills (comma-separated)
        r'\*\*Skills:\*\*\s*(.+)',                # Pattern 2: **Skills:** (markdown format)
        r'\* Skills:\s*(.+)',                     # Pattern 3: * Skills: (generic bullet format)
        r'Skills:\s*(.+)',                        # Pattern 4: Simple Skills: with no special chars
        r'\*\*Required Skills\*\*:\s*(.+)',       # Pattern 5: **Required Skills:** (JD markdown format)
        r'Required Skills:\s*(.+)',               # Pattern 6: Required Skills (JD plain format)
        r'\*\*Skills\*\*:\s*(.+)',                # Pattern 7: **Skills**: (new format with colon-separated info)
    ]
    
    for pattern in patterns:
        skills_regex = re.search(pattern, text)
        if skills_regex:
            # Clean and normalize extracted skills
            return [clean_text(skill.strip().lower()) for skill in skills_regex.group(1).split(',')]
    
    return []  # Return empty list if no match

# Function to extract job titles from text using multiple regex patterns
def extract_job_titles(text, type="resume"):
    """Extracts job titles from the text using multiple patterns for flexibility."""
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',  # Pattern 1: Job Titles (comma-separated)
        r'\*\*Job Titles:\*\*\s*(.+)',               # Pattern 2: **Job Titles:** (markdown format)
        r'\* Job Titles:\s*(.+)',                    # Pattern 3: * Job Titles: (generic bullet format)
        r'Job Titles:\s*(.+)',                       # Pattern 4: Simple Job Titles: with no special chars
        r'\*\*Job Title\*\*:\s*(.+)',                # Pattern 5: **Job Title:** (JD markdown format)
        r'Job Title:\s*(.+)',                        # Pattern 6: Job Title (JD plain format)
        r'\*\*Job Titles\*\*:\s*(.+)',               # Pattern 7: **Job Titles**: (new format with colon-separated info)
    ]
    
    for pattern in patterns:
        job_titles_regex = re.search(pattern, text)
        if job_titles_regex:
            # Clean and normalize extracted job titles
            return [clean_text(title.strip().lower()) for title in job_titles_regex.group(1).split(',')]
    
    return []  # Return empty list if no match

# Function to extract years of experience from text using multiple regex patterns
def extract_experience(text, type="resume"):
    """Extracts years of experience from the text using multiple patterns."""
    patterns = [
        r'Years of Experience:\s*(\d+)',               # Pattern 1: Years of Experience:
        r'\*\*Years of Experience:\*\*\s*(\d+)',       # Pattern 2: **Years of Experience:** (markdown format)
        r'\* Years of Experience:\s*(\d+)',            # Pattern 3: * Years of Experience:
        r'Experience:\s*(\d+)',                        # Pattern 4: Simple Experience:
        r'Experience required:\s*(\d+)',               # Pattern 5: Used in job descriptions (JD)
        r'\*\*Years of Experience required\*\*:\s*(\d+)',  # Pattern 6: **Years of Experience required** (JD markdown format)
        r'\*\*Years of Experience\*\*:\s*(\d+)\s*years?',  # Pattern 7: **Years of Experience**: 10 years (new format with colon-separated info)
    ]
    
    for pattern in patterns:
        experience_regex = re.search(pattern, text)
        if experience_regex:
            return int(clean_text(experience_regex.group(1)))  # Clean and extract the number
    
    return 0  # Return 0 if no match

def calculate_resume_score(resume_text, jd_text):
    """Calculates a score for the resume based on the job description."""
    try:
        # Extract resume info
        resume_skills = extract_skills(resume_text, type="resume")
        resume_job_titles = extract_job_titles(resume_text, type="resume")
        resume_experience = extract_experience(resume_text, type="resume")
        
        # Extract JD info
        jd_skills = extract_skills(jd_text, type="jd")
        jd_job_titles = extract_job_titles(jd_text, type="jd")
        jd_experience_required = extract_experience(jd_text, type="jd")

        # Print extracted information for debugging
        print("Resume Skills:", resume_skills)
        print("Resume Job Titles:", resume_job_titles)
        print("Resume Experience (Years):", resume_experience)

        print("\nJD Skills:", jd_skills)
        print("JD Job Titles:", jd_job_titles)
        print("JD Experience Required (Years):", jd_experience_required)

        # Initialize score variables
        score = 0
        max_score = 100
        weights = {
            "skills": 0.6,  # 60% for skills
            "experience": 0.2,  # 20% for experience
            "job_title": 0.2  # 20% for job titles
        }

        # --- Skills Matching ---
        resume_skills_set = set(resume_skills)
        jd_skills_set = set(jd_skills)

        # Exact skill matches
        exact_skill_matches = resume_skills_set.intersection(jd_skills_set)
        partial_skill_matches = set()

        # Fuzzy matching for partial skill matches
        for resume_skill in resume_skills_set:
            for jd_skill in jd_skills_set:
                if fuzzy_match(resume_skill, jd_skill) > 0.7:  # Fuzzy match with a higher threshold
                    partial_skill_matches.add(resume_skill)

        # Combine exact and partial matches, avoiding double-counting
        total_skill_matches = exact_skill_matches.union(partial_skill_matches)
        skill_match_ratio = len(total_skill_matches) / len(jd_skills_set) if jd_skills_set else 0
        
        # Skills score is based on the match ratio, with more weight given to exact matches
        skill_score = skill_match_ratio * weights['skills'] * max_score
        score += skill_score

        print("\nExact Skill Matches:", exact_skill_matches)
        print("Partial Skill Matches:", partial_skill_matches)
        print("Skill Match Ratio:", skill_match_ratio)
        print("Skill Score:", skill_score)

        # --- Experience Matching ---
        experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0

        if resume_experience >= jd_experience_required:
            # Full marks for experience if the resume experience is greater than or equal to the JD requirement
            experience_score = weights['experience'] * max_score
        else:
            # Proportional score if resume experience is less than required
            experience_score = experience_match_ratio * weights['experience'] * max_score

        score += experience_score

        print("Experience Match Ratio:", experience_match_ratio)
        print("Experience Score:", experience_score)

        # --- Job Title Matching ---
        best_title_match = 0
        for resume_title in resume_job_titles:
            for jd_title in jd_job_titles:
                best_title_match = max(best_title_match, fuzzy_match(resume_title, jd_title))

        # Add job title score if a good match exists
        job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
        score += job_title_score

        print("Best Job Title Match Score:", best_title_match)
        print("Job Title Score:", job_title_score)

        # Return final score rounded to 2 decimal places
        final_score = round(score, 2)
        print("\nFinal Resume Score:", final_score)
        return final_score

    except Exception as e:
        print(f"Error calculating resume score: {e}")
        return 0

# Function to read the contents of a file
def read_file(file_path):
    """Reads and returns the content of a file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# File paths for resume and job description files
resume_file = 'extracted_resume_info.txt'  # Path to the resume text file
jd_file = 'extracted_jd_info.txt'

# Read resume and job description content
resume_text = read_file(resume_file)
jd_text = read_file(jd_file)

# If both files are successfully read, calculate and print the resume score
if resume_text and jd_text:
    score = calculate_resume_score(resume_text, jd_text)
    print(f"\nResume Score: {score}")
else:
    print("Error: One or both files could not be read.")


Resume Skills: ['python', 'c', 'sql', 'mysql', 'postgresql', 'redshift', 'aws', 'azure cloud', 'pytorch', 'scikit-learn', 'tensorflow', 'nlp', 'computer vision', 'opencv', 'yolo', 'docker', 'vs code', 'dbeaver', 'jupyter notebook', 'git', 'problem-solving', 'analytical thinking', 'communication', 'team collaboration']
Resume Job Titles: ['machine learning intern', 'data engineering intern']
Resume Experience (Years): 2

JD Skills: ['python', 'tensorflow', 'keras', 'pytorch', 'hadoop or spark', 'strong problem-solving skills']
JD Job Titles: ['machine learning engineer', 'data scientist', 'engineer']
JD Experience Required (Years): 3

Exact Skill Matches: {'python', 'pytorch', 'tensorflow'}
Partial Skill Matches: {'python', 'pytorch', 'tensorflow'}
Skill Match Ratio: 0.5
Skill Score: 30.0
Experience Match Ratio: 0.6666666666666666
Experience Score: 13.333333333333334
Best Job Title Match Score: 0.875
Job Title Score: 17.5

Final Resume Score: 60.83

Resume Score: 60.83


In [2]:
from langchain_community.llms import Ollama
MODEL = "llama3"
model = Ollama(model=MODEL)

  model = Ollama(model=MODEL)


In [None]:
import mysql.connector
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = mysql.connector.connect(
        host="localhost",
        user="root",
        password="root"
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

# Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS {company_name}")
    conn.database = company_name
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS information (
            id INT AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(255),
            phone_number VARCHAR(20),
            email VARCHAR(255),
            skills TEXT,
            score FLOAT
        )
    """)
    conn.commit()
    cursor.close()
    conn.close()

# Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    conn.database = company_name
    cursor = conn.cursor()

    # Ensure all required fields are present
    name = resume_info.get('Name')
    phone_number = resume_info.get('Phone Number')
    email = resume_info.get('Email')
    skills = resume_info.get('List of Skills')

    # Debugging output
    print(f"Inserting into DB - Name: {name}, Phone: {phone_number}, Email: {email}, Skills: {skills}, Score: {score}")

    # Insert into the database
    query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
    cursor.execute(query, (name, phone_number, email, skills, score))

    conn.commit()
    cursor.close()
    conn.close()

# Function to extract skills from text using multiple regex patterns
def extract_skills(text, type="resume"):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',  
        r'\\*Skills:\\\s(.+)',                
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'\\*Required Skills\\:\s(.+)',       
        r'Required Skills:\s*(.+)',              
        r'\\*Skills\\:\s(.+)',               
        r'List of Skills:\s*(.+)',               
        r'List of Skills: (.+)',                  
        r'Skills:\s*(.*)\n',                      
        r'\* Skills\s*:\s*(.+)',                  
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Skills:\\\s(.+)',              
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'Required Skills?:\s*(.+)',            
        r'\\*Skills\\:\s(.+)',              
        r'List of Skills:\s*(.+)',              
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Skills:\\\s(.+)',              
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'Required Skills?:\s*(.+)',            
        r'\\*Skills\\:\s(.+)',              
        r'Position Titles?:\s*(.+)',           
        r'\\*Job Title\\:\s(.+)',          
        r'Position\s*(?:Held|Held\s*:\s*|Titles?)\s*:\s*(.+)', 
        r'\b(?:Work Experience|Employment History)\b\s*:\s*(.+)',  
    ]
    
    for pattern in patterns:
        skills_regex = re.search(pattern, text)
        if skills_regex:
            return [clean_text(skill.strip().lower()) for skill in skills_regex.group(1).split(',')]
    
    return []  

# Function to extract job titles from text using multiple regex patterns
def extract_job_titles(text, type="resume"):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',  
        r'\\*Job Titles:\\\s(.+)',               
        r'\* Job Titles:\s*(.+)',                    
        r'Job Titles:\s*(.+)',                       
        r'\\*Job Title\\:\s(.+)',              
        r'Job Title:\s*(.+)',                        
        r'\\*Job Titles\\:\s(.+)',              
        r'List of Job Titles:\s*(.+)',              
        r'List of Job Titles\s*:\s*(.+)',            
        r'Job Titles?\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Job Titles:\\\s(.+)',              
        r'\* Job Titles?:\s*(.+)',                  
        r'Job Titles?:\s*(.+)',                     
        r'Position Titles?:\s*(.+)',               
        r'\\*Job Title\\:\s(.+)',              
        r'Position\s*(?:Held|Held\s*:\s*|Titles?)\s*:\s*(.+)', 
        r'\b(?:Work Experience|Employment History)\b\s*:\s*(.+)',  
    ]
    
    for pattern in patterns:
        job_titles_regex = re.search(pattern, text)
        if job_titles_regex:
            return [clean_text(title.strip().lower()) for title in job_titles_regex.group(1).split(',')]
    
    return []  

# Function to extract years of experience from text using multiple regex patterns
def extract_experience(text, type="resume"):
    patterns = [
        r'Years of Experience:\s*(\d+)',               
        r'\\*Years of Experience:\\\s(\ d+)',       
        r'\* Years of Experience:\s*(\d+)',            
        r'Experience:\s*(\d+)',                        
        r'Experience required:\s*(\d+)',              
        r'\\*Years of Experience required\\:\s(\d+)',  
        r'\\*Years of Experience\\:\s(\d+)\s*years?',  
        r'Years of Experience\s*:\s*(\d+)',            
        r'\* Years of Experience\s*:\s*(\d+)',         
        r'Years of Experience:\s*(\d+)',               
        r'\\*Years of Experience:\\\s(\d+)',       
        r'Experience\s*(?:Required|Needed|Desired)?\s*:\s*(\d+)',  
        r'\d+\+?\s*years? (?:of)? experience',        
        r'\d+\s*years? experience(?: required)?',      
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)',  
    ]
    
    for pattern in patterns:
        experience_regex = re.search(pattern, text)
        if experience_regex:
            return int(clean_text(experience_regex.group(1)))  
    
    return 0  

# Function to calculate the resume score
def calculate_resume_score(resume_text, jd_text):
    try:
        # Extract resume info
        resume_skills = extract_skills(resume_text, type="resume")
        resume_job_titles = extract_job_titles(resume_text, type="resume")
        resume_experience = extract_experience(resume_text, type="resume")
        
        # Extract JD info
        jd_skills = extract_skills(jd_text, type="jd")
        jd_job_titles = extract_job_titles(jd_text, type="jd")
        jd_experience_required = extract_experience(jd_text, type="jd")

        # Print extracted information for debugging
        print("Resume Skills:", resume_skills)
        print("Resume Job Titles:", resume_job_titles)
        print("Resume Experience (Years):", resume_experience)

        print("\nJD Skills:", jd_skills)
        print("JD Job Titles:", jd_job_titles)
        print("JD Experience Required (Years):", jd_experience_required)

        # Initialize score variables
        score = 0
        max_score = 100
        weights = {
            "skills": 0.6,  # 60% for skills
            "experience": 0.2,  # 20% for experience
            "job_title": 0.2  # 20% for job titles
        }

        # --- Skills Matching ---
        resume_skills_set = set(resume_skills)
        jd_skills_set = set(jd_skills)

        # Exact skill matches
        exact_skill_matches = resume_skills_set.intersection(jd_skills_set)
        partial_skill_matches = set()

        # Fuzzy matching for partial skill matches
        for resume_skill in resume_skills_set:
            for jd_skill in jd_skills_set:
                if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match with a higher threshold
                    partial_skill_matches.add(resume_skill)

        # Combine exact and partial matches, avoiding double-counting
        total_skill_matches = exact_skill_matches.union(partial_skill_matches)
        skill_match_ratio = len(total_skill_matches) / len(jd_skills_set) if jd_skills_set else 0
        
        # Skills score is based on the match ratio, with more weight given to exact matches
        skill_score = skill_match_ratio * weights['skills'] * max_score
        score += skill_score

        print("\nExact Skill Matches:", exact_skill_matches)
        print("Partial Skill Matches:", partial_skill_matches)
        print("Skill Match Ratio:", skill_match_ratio)
        print("Skill Score:", skill_score)

        # --- Experience Matching ---
        experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0

        if resume_experience >= jd_experience_required:
            # Full marks for experience if the resume experience is greater than or equal to the JD requirement
            experience_score = weights['experience'] * max_score
        else:
            # Proportional score if resume experience is less than required
            experience_score = experience_match_ratio * weights['experience'] * max_score

        score += experience_score

        print("Experience Match Ratio:", experience_match_ratio)
        print("Experience Score:", experience_score)

        # --- Job Title Matching ---
        best_title_match = 0
        for resume_title in resume_job_titles:
            for jd_title in jd_job_titles:
                best_title_match = max(best_title_match, fuzzy_match(resume_title, jd_title))

        # Add job title score if a good match exists
        job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
        score += job_title_score

        print("Best Job Title Match Score:", best_title_match)
        print("Job Title Score:", job_title_score)

        # Return final score rounded to 2 decimal places
        final_score = round(score, 2)
        print("\nFinal Resume Score:", final_score)
        return final_score

    except Exception as e:
        print(f"Error calculating resume score: {e}")
        return 0

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Step 6: Example usage of the functions
def process_resume():
    # Define the PDF file paths
    resume_pdf_path = 'resume4.pdf'
    jd_pdf_path = 'jd.pdf'

    # Extract text from the resume and job description PDFs
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    if resume_text and jd_text:
        # Extract structured information from both the resume and the job description
        resume_info_raw = extract_resume_info(resume_text)
        jd_info_raw = extract_jd_info(jd_text)

        # Save extracted info to .txt files
        save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
        save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')

        # Step 3: Read the extracted info from the .txt files
        resume_info_text = read_file('extracted_resume_info.txt')
        jd_info_text = read_file('extracted_jd_info.txt')

        # Parse extracted information
        resume_info = parse_extracted_info(resume_info_text)
        jd_info = parse_extracted_info(jd_info_text)

        # Debugging output
        print("Resume Info:", resume_info)
        print("JD Info:", jd_info)

        # Step 4: Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Step 5: Calculate resume score
        score = calculate_resume_score(resume_info_text, jd_info_text)

        # Step 6: Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Run the processing function
if __name__ == '__main__':
    process_resume()

In [6]:
import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (using pymysql)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # This ensures results are returned as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Here is the resume:
    {resume_text}
    """
    try:
        # Assuming a hypothetical LLaMA model, replace with the actual call if implemented
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Assuming a hypothetical LLaMA model, replace with the actual call if implemented
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

# Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS prashant")
    conn.select_db(company_name)
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS information (
            id INT AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(255),
            phone_number VARCHAR(20),
            email VARCHAR(255),
            skills TEXT,
            score FLOAT
        )
    """)
    conn.commit()
    cursor.close()
    conn.close()

# Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    conn.select_db(company_name)
    cursor = conn.cursor()

    # Ensure all required fields are present
    name = resume_info.get('Name')
    phone_number = resume_info.get('Phone Number')
    email = resume_info.get('Email')
    skills = resume_info.get('List of Skills')

    # Debugging output
    print(f"Inserting into DB - Name: {name}, Phone: {phone_number}, Email: {email}, Skills: {skills}, Score: {score}")

    # Insert into the database
    query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
    cursor.execute(query, (name, phone_number, email, skills, score))

    conn.commit()
    cursor.close()
    conn.close()

# Function to extract skills from text using multiple regex patterns
def extract_skills(text, type="resume"):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',  
        r'\\*Skills:\\\s(.+)',                
        r'Skills:\s*(.+)',                       
        r'Required Skills:\s*(.+)',              
    ]
    
    for pattern in patterns:
        skills_regex = re.search(pattern, text)
        if skills_regex:
            return [clean_text(skill.strip().lower()) for skill in skills_regex.group(1).split(',')]
    
    return []  

# Function to extract job titles from text using multiple regex patterns
def extract_job_titles(text, type="resume"):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',  
        r'\\*Job Titles:\\\s(.+)',               
        r'Job Titles:\s*(.+)',                    
    ]
    
    for pattern in patterns:
        job_titles_regex = re.search(pattern, text)
        if job_titles_regex:
            return [clean_text(title.strip().lower()) for title in job_titles_regex.group(1).split(',')]
    
    return []  

# Function to extract years of experience from text using multiple regex patterns
def extract_experience(text, type="resume"):
    patterns = [
        r'Years of Experience:\s*(\d+)',               
    ]
    
    for pattern in patterns:
        experience_regex = re.search(pattern, text)
        if experience_regex:
            return int(clean_text(experience_regex.group(1)))  
    
    return 0  

# Function to calculate the resume score
def calculate_resume_score(resume_text, jd_text):
    try:
        # Extract resume info
        resume_skills = extract_skills(resume_text, type="resume")
        resume_job_titles = extract_job_titles(resume_text, type="resume")
        resume_experience = extract_experience(resume_text, type="resume")
        
        # Extract JD info
        jd_skills = extract_skills(jd_text, type="jd")
        jd_job_titles = extract_job_titles(jd_text, type="jd")
        jd_experience_required = extract_experience(jd_text, type="jd")

        # Print extracted information for debugging
        print("Resume Skills:", resume_skills)
        print("Resume Job Titles:", resume_job_titles)
        print("Resume Experience (Years):", resume_experience)

        print("\nJD Skills:", jd_skills)
        print("JD Job Titles:", jd_job_titles)
        print("JD Experience Required (Years):", jd_experience_required)

        # Initialize score variables
        score = 0
        max_score = 100
        weights = {
            "skills": 0.6,  # 60% for skills
            "experience": 0.2,  # 20% for experience
            "job_title": 0.2  # 20% for job titles
        }

        # --- Skills Matching ---
        resume_skills_set = set(resume_skills)
        jd_skills_set = set(jd_skills)

        # Exact skill matches
        exact_skill_matches = resume_skills_set.intersection(jd_skills_set)
        partial_skill_matches = set()

        # Fuzzy matching for partial skill matches
        for resume_skill in resume_skills_set:
            for jd_skill in jd_skills_set:
                if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match with a higher threshold
                    partial_skill_matches.add(resume_skill)

        # Combine exact and partial matches, avoiding double-counting
        total_skill_matches = exact_skill_matches.union(partial_skill_matches)
        skill_match_ratio = len(total_skill_matches) / len(jd_skills_set) if jd_skills_set else 0
        
        # Skills score is based on the match ratio, with more weight given to exact matches
        skill_score = skill_match_ratio * weights['skills'] * max_score
        score += skill_score

        print("\nExact Skill Matches:", exact_skill_matches)
        print("Partial Skill Matches:", partial_skill_matches)
        print("Skill Match Ratio:", skill_match_ratio)
        print("Skill Score:", skill_score)

        # --- Experience Matching ---
        experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0

        if resume_experience >= jd_experience_required:
            # Full marks for experience if the resume experience is greater than or equal to the JD requirement
            experience_score = weights['experience'] * max_score
        else:
            # Proportional score if resume experience is less than required
            experience_score = experience_match_ratio * weights['experience'] * max_score

        score += experience_score

        print("Experience Match Ratio:", experience_match_ratio)
        print("Experience Score:", experience_score)

        # --- Job Title Matching ---
        best_title_match = 0
        for resume_title in resume_job_titles:
            for jd_title in jd_job_titles:
                best_title_match = max(best_title_match, fuzzy_match(resume_title, jd_title))

        # Add job title score if a good match exists
        job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
        score += job_title_score

        print("Best Job Title Match Score:", best_title_match)
        print("Job Title Score:", job_title_score)

        # Return final score rounded to 2 decimal places
        final_score = round(score, 2)
        print("\nFinal Resume Score:", final_score)
        return final_score

    except Exception as e:
        print(f"Error calculating resume score: {e}")
        return 0

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Step 6: Example usage of the functions
def process_resume():
    # Define the PDF file paths
    resume_pdf_path = 'resume4.pdf'
    jd_pdf_path = 'jd2.pdf'

    # Extract text from the resume and job description PDFs
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    if resume_text and jd_text:
        # Extract structured information from both the resume and the job description
        resume_info_raw = extract_resume_info(resume_text)
        jd_info_raw = extract_jd_info(jd_text)

        # Save extracted info to .txt files
        save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
        save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')

        # Step 3: Read the extracted info from the .txt files
        resume_info_text = read_file('extracted_resume_info.txt')
        jd_info_text = read_file('extracted_jd_info.txt')

        # Parse extracted information
        resume_info = parse_extracted_info(resume_info_text)
        jd_info = parse_extracted_info(jd_info_text)

        # Debugging output
        print("Resume Info:", resume_info)
        print("JD Info:", jd_info)

        # Step 4: Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Step 5: Calculate resume score
        score = calculate_resume_score(resume_info_text, jd_info_text)

        # Step 6: Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Run the processing function
if __name__ == '__main__':
    process_resume()


Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt
Resume Info: {'Here is the extracted information': '', '**Name': '** Shivam Sood', '**Email': '** soodshivam576@gmail.com', '**Phone Number': '** +91 9821074705', '**Job Titles': '** Machine Learning Intern, Data Engineering Intern, Programming Language - python, c, Database - SQL, MYSQL, POSTGRESQL, Redshift, Cloud - AWS, Azure Cloud, Machine / Deep Learning - Pytorch, Scikit-learn, Tensorflow, NLP, Computer Vision - OpenCV, YOLO', '**Skills': '** Python, C, SQL, MYSQL, POSTGRESQL, Redshift, AWS, Azure Cloud, Pytorch, Scikit-learn, Tensorflow, NLP, OpenCV, YOLO, Docker, VS Code, DBeaver, Jupyter Notebook, Git, Problem-Solving, Analytical Thinking, Communication, Team Collaboration', '**Years of Experience': '** 2 (assuming the internship experience is included)', '**Companies worked with': '** QUBITNETS TECHNOLOGIES, Namasys Analytics'}
JD Info: {'Here is the extracted information': '', '* Compan

In [11]:
import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root"
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

# Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS {company_name}")
    conn.database = company_name
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS information (
            id INT AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(255),
            phone_number VARCHAR(20),
            email VARCHAR(255),
            skills TEXT,
            score FLOAT
        )
    """)
    conn.commit()
    cursor.close()
    conn.close()

# Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    conn.database = company_name
    cursor = conn.cursor()

    # Ensure all required fields are present
    name = resume_info.get('Name')
    phone_number = resume_info.get('Phone Number')
    email = resume_info.get('Email')
    skills = resume_info.get('List of Skills')

    # Debugging output
    print(f"Inserting into DB - Name: {name}, Phone: {phone_number}, Email: {email}, Skills: {skills}, Score: {score}")

    # Insert into the database
    query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
    cursor.execute(query, (name, phone_number, email, skills, score))

    conn.commit()
    cursor.close()
    conn.close()

# Function to extract skills from text using multiple regex patterns
def extract_skills(text, type="resume"):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',  
        r'\\*Skills:\\\s(.+)',                
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'\\*Required Skills\\:\s(.+)',       
        r'Required Skills:\s*(.+)',              
        r'\\*Skills\\:\s(.+)',               
        r'List of Skills:\s*(.+)',               
        r'List of Skills: (.+)',                  
        r'Skills:\s*(.*)\n',                      
        r'\* Skills\s*:\s*(.+)',                  
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Skills:\\\s(.+)',              
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'Required Skills?:\s*(.+)',            
        r'\\*Skills\\:\s(.+)',              
        r'List of Skills:\s*(.+)',              
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Skills:\\\s(.+)',              
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'Required Skills?:\s*(.+)',            
        r'\\*Skills\\:\s(.+)',              
        r'Position Titles?:\s*(.+)',           
        r'\\*Job Title\\:\s(.+)',          
        r'Position\s*(?:Held|Held\s*:\s*|Titles?)\s*:\s*(.+)', 
        r'\b(?:Work Experience|Employment History)\b\s*:\s*(.+)',  
    ]
    
    for pattern in patterns:
        skills_regex = re.search(pattern, text)
        if skills_regex:
            return [clean_text(skill.strip().lower()) for skill in skills_regex.group(1).split(',')]
    
    return []  

# Function to extract job titles from text using multiple regex patterns
def extract_job_titles(text, type="resume"):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',  
        r'\\*Job Titles:\\\s(.+)',               
        r'\* Job Titles:\s*(.+)',                    
        r'Job Titles:\s*(.+)',                       
        r'\\*Job Title\\:\s(.+)',              
        r'Job Title:\s*(.+)',                        
 r'\\*Job Titles\\:\s(.+)',              
        r'List of Job Titles:\s*(.+)',              
        r'List of Job Titles\s*:\s*(.+)',            
        r'Job Titles?\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Job Titles:\\\s(.+)',              
        r'\* Job Titles?:\s*(.+)',                  
        r'Job Titles?:\s*(.+)',                     
        r'Position Titles?:\s*(.+)',               
        r'\\*Job Title\\:\s(.+)',              
        r'Position\s*(?:Held|Held\s*:\s*|Titles?)\s*:\s*(.+)', 
        r'\b(?:Work Experience|Employment History)\b\s*:\s*(.+)',  
    ]
    
    for pattern in patterns:
        job_titles_regex = re.search(pattern, text)
        if job_titles_regex:
            return [clean_text(title.strip().lower()) for title in job_titles_regex.group(1).split(',')]
    
    return []  

# Function to extract years of experience from text using multiple regex patterns
def extract_experience(text, type="resume"):
    patterns = [
        r'Years of Experience:\s*(\d+)',               
        r'\\ Years of Experience:\\\s*(\ d+)',       
        r'\* Years of Experience:\s*(\d+)',            
        r'Experience:\s*(\d+)',                        
        r'Experience required:\s*(\d+)',              
        r'\\*Years of Experience required\\:\s(\d+)',  
        r'\\*Years of Experience\\:\s(\d+)\s*years?',  
        r'Years of Experience\s*:\s*(\d+)',            
        r'\* Years of Experience\s*:\s*(\d+)',         
        r'Years of Experience:\s*(\d+)',               
        r'\\*Years of Experience:\\\s(\d+)',       
        r'Experience\s*(?:Required|Needed|Desired)?\s*:\s*(\d+)',  
        r'\d+\+?\s*years? (?:of)? experience',        
        r'\d+\s*years? experience(?: required)?',      
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)',  
    ]
    
    for pattern in patterns:
        experience_regex = re.search(pattern, text)
        if experience_regex:
            return int(clean_text(experience_regex.group(1)))  
    
    return 0  

# Function to calculate the resume score
def calculate_resume_score(resume_text, jd_text):
    try:
        # Extract resume info
        resume_skills = extract_skills(resume_text, type="resume")
        resume_job_titles = extract_job_titles(resume_text, type="resume")
        resume_experience = extract_experience(resume_text, type="resume")
        
        # Extract JD info
        jd_skills = extract_skills(jd_text, type="jd")
        jd_job_titles = extract_job_titles(jd_text, type="jd")
        jd_experience_required = extract_experience(jd_text, type="jd")

        # Print extracted information for debugging
        print("Resume Skills:", resume_skills)
        print("Resume Job Titles:", resume_job_titles)
        print("Resume Experience (Years):", resume_experience)

        print("\nJD Skills:", jd_skills)
        print("JD Job Titles:", jd_job_titles)
        print("JD Experience Required (Years):", jd_experience_required)

        # Initialize score variables
        score = 0
        max_score = 100
        weights = {
            "skills": 0.6,  # 60% for skills
            "experience": 0.2,  # 20% for experience
            "job_title": 0.2  # 20% for job titles
        }

        # --- Skills Matching ---
        resume_skills_set = set(resume_skills)
        jd_skills_set = set(jd_skills)

        # Exact skill matches
        exact_skill_matches = resume_skills_set.intersection(jd_skills_set)
        partial_skill_matches = set()

        # Fuzzy matching for partial skill matches
        for resume_skill in resume_skills_set:
            for jd_skill in jd_skills_set:
                if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match with a higher threshold
                    partial_skill_matches.add(resume_skill)

        # Combine exact and partial matches, avoiding double-counting
        total_skill_matches = exact_skill_matches.union(partial_skill_matches)
        skill_match_ratio = len(total_skill_matches) / len(jd_skills_set) if jd_skills_set else 0
        
        # Skills score is based on the match ratio, with more weight given to exact matches
        skill_score = skill_match_ratio * weights['skills'] * max_score
        score += skill_score

        print("\nExact Skill Matches:", exact_skill_matches)
        print("Partial Skill Matches:", partial_skill_matches)
        print("Skill Match Ratio:", skill_match_ratio)
        print("Skill Score:", skill_score)

        # --- Experience Matching ---
        experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0

        if resume_experience >= jd_experience_required:
            # Full marks for experience if the resume experience is greater than or equal to the JD requirement
            experience_score = weights['experience'] * max_score
        else:
            # Proportional score if resume experience is less than required
            experience_score = experience_match_ratio * weights['experience'] * max_score

        score += experience_score

        print("Experience Match Ratio:", experience_match_ratio)
        print("Experience Score:", experience_score)

        # --- Job Title Matching ---
        best_title_match = 0
        for resume_title in resume_job_titles:
            for jd_title in jd_job_titles:
                best_title_match = max(best_title_match, fuzzy_match(resume_title, jd_title))

        # Add job title score if a good match exists
        job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
        score += job_title_score

        print("Best Job Title Match Score:", best_title_match)
        print("Job Title Score:", job_title_score)

        # Return final score rounded to 2 decimal places
        final_score = round(score, 2)
        print("\nFinal Resume Score:", final_score)
        return final_score

    except Exception as e:
        print(f"Error calculating resume score: {e}")
        return 0

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Step 6: Example usage of the functions
def process_resume():
    # Define the PDF file paths
    resume_pdf_path = 'resume.pdf'
    jd_pdf_path = 'jd2.pdf'

    # Extract text from the resume and job description PDFs
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    if resume_text and jd_text:
        # Extract structured information from both the resume and the job description
        resume_info_raw = extract_resume_info(resume_text)
        jd_info_raw = extract_jd_info(jd_text)

        # Save extracted info to .txt files
        save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
        save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')

        # Step 3: Read the extracted info from the .txt files
        resume_info_text = read_file('extracted_resume_info.txt')
        jd_info_text = read_file('extracted_jd_info.txt')

        # Parse extracted information
        resume_info = parse_extracted_info(resume_info_text)
        jd_info = parse_extracted_info(jd_info_text)

        # Debugging output
        print("Resume Info:", resume_info)
        print("JD Info:", jd_info)

        # Step 4: Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Step 5: Calculate resume score
        score = calculate_resume_score(resume_info_text, jd_info_text)

        # Step 6: Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Run the processing function
if __name__ == '__main__':
    process_resume()

Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt
Resume Info: {'Here is the extracted information': '', '**Name': '** Prashant Singh', '**Email': '** prashantsingha96@gmail.com', '**Phone Number': '** +91 8368796901', '**Job Titles (comma-separated)': '** Machine Learning Intern, Data Scientist, Problem-Solver', '**Skills (comma-separated)': '** Python, C, SQL, MYSQL, POSTGRESQL, Redshift, AWS, Azure Cloud, Pytorch, Scikit-learn, Tensorflow, NLP, OpenCV, YOLO, Docker, VS Code, DBeaver, Jupyter Notebook, Git', '**Years of Experience': '** 2 (2020-2021)', '**Companies worked with (comma-separated)': '** Qubitnets Technologies'}
JD Info: {'Here is the extracted information': '', '* Company Name': 'Google LLC', '* Email': 'johndoe@google.com', '* Phone Number': '(555) 555-5555', '* Job Title': 'Machine Learning Engineer, Data Scientist', '* List of Required Skills': 'Python, TensorFlow, Keras, PyTorch, Hadoop, Spark, problem-solving skills', '* Year

OperationalError: (1046, 'No database selected')

In [7]:
import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

# Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Ensure all required fields are present
        name = resume_info.get('Name')
        phone_number = resume_info.get('Phone Number')
        email = resume_info.get('Email')
        skills = resume_info.get('List of Skills')

        # Debugging output
        print(f"Inserting into DB - Name: {name}, Phone: {phone_number}, Email: {email}, Skills: {skills}, Score: {score}")

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (name, phone_number, email, skills, score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Function to extract skills from text using multiple regex patterns
def extract_skills(text, type="resume"):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',  
        r'\\*Skills:\\\s(.+)',                
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'\\*Required Skills\\:\s(.+)',       
        r'Required Skills:\s*(.+)',              
        r'\\*Skills\\:\s(.+)',               
        r'List of Skills:\s*(.+)',               
        r'List of Skills: (.+)',                  
        r'Skills:\s*(.*)\n',                      
        r'\* Skills\s*:\s*(.+)',                  
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Skills:\\\s(.+)',              
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'Required Skills?:\s*(.+)',            
        r'\\*Skills\\:\s(.+)',              
        r'List of Skills:\s*(.+)',              
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Skills:\\\s(.+)',              
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'Required Skills?:\s*(.+)',            
        r'\\*Skills\\:\s(.+)',              
        r'Position Titles?:\s*(.+)',           
        r'\\*Job Title\\:\s(.+)',          
        r'Position\s*(?:Held|Held\s*:\s*|Titles?)\s*:\s*(.+)', 
        r'\b(?:Work Experience|Employment History)\b\s*:\s*(.+)',                       
    ]
    
    for pattern in patterns:
        skills_regex = re.search(pattern, text)
        if skills_regex:
            return [clean_text(skill.strip().lower()) for skill in skills_regex.group(1).split(',')]
    
    return []  

# Function to extract job titles from text using multiple regex patterns
def extract_job_titles(text, type="resume"):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',  
        r'\\*Job Titles:\\\s(.+)',               
        r'\* Job Titles:\s*(.+)',                    
        r'Job Titles:\s*(.+)',                       
        r'\\*Job Title\\:\s(.+)',              
        r'Job Title:\s*(.+)',                        
        r'\\*Job Titles\\:\s(.+)',              
        r'List of Job Titles:\s*(.+)',              
        r'List of Job Titles\s*:\s*(.+)',            
        r'Job Titles?\s*(?:\(.+?\):)?\s*(.+)',       
        r'\\*Job Titles:\\\s(.+)',              
        r'\* Job Titles?:\s*(.+)',                  
        r'Job Titles?:\s*(.+)',                     
        r'Position Titles?:\s*(.+)',               
        r'\\*Job Title\\:\s(.+)',              
        r'Position\s*(?:Held|Held\s*:\s*|Titles?)\s*:\s*(.+)', 
        r'\b(?:Work Experience|Employment History)\b\s*:\s*(.+)',                     
    ]
    
    for pattern in patterns:
        job_titles_regex = re.search(pattern, text)
        if job_titles_regex:
            return [clean_text(title.strip().lower()) for title in job_titles_regex.group(1).split(',')]
    
    return []  

# Function to extract years of experience from text using multiple regex patterns
def extract_experience(text, type="resume"):
    patterns = [
         r'Years of Experience:\s*(\d+)',               
        r'\\ Years of Experience:\\\s*(\ d+)',       
        r'\* Years of Experience:\s*(\d+)',            
        r'Experience:\s*(\d+)',                        
        r'Experience required:\s*(\d+)',              
        r'\\*Years of Experience required\\:\s(\d+)',  
        r'\\*Years of Experience\\:\s(\d+)\s*years?',  
        r'Years of Experience\s*:\s*(\d+)',            
        r'\* Years of Experience\s*:\s*(\d+)',         
        r'Years of Experience:\s*(\d+)',               
        r'\\*Years of Experience:\\\s(\d+)',       
        r'Experience\s*(?:Required|Needed|Desired)?\s*:\s*(\d+)',  
        r'\d+\+?\s*years? (?:of)? experience',        
        r'\d+\s*years? experience(?: required)?',      
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)',               
    ]
    
    for pattern in patterns:
        experience_regex = re.search(pattern, text)
        if experience_regex:
            return int(clean_text(experience_regex.group(1)))  
    
    return 0  

# Function to calculate the resume score
def calculate_resume_score(resume_text, jd_text):
    try:
        # Extract resume info
        resume_skills = extract_skills(resume_text, type="resume")
        resume_job_titles = extract_job_titles(resume_text, type="resume")
        resume_experience = extract_experience(resume_text, type="resume")
        
        # Extract JD info
        jd_skills = extract_skills(jd_text, type="jd")
        jd_job_titles = extract_job_titles(jd_text, type="jd")
        jd_experience_required = extract_experience(jd_text, type="jd")

        # Print extracted information for debugging
        print("Resume Skills:", resume_skills)
        print("Resume Job Titles:", resume_job_titles)
        print("Resume Experience (Years):", resume_experience)

        print("\nJD Skills:", jd_skills)
        print("JD Job Titles:", jd_job_titles)
        print("JD Experience Required (Years):", jd_experience_required)

        # Initialize score variables
        score = 0
        max_score = 100
        weights = {
            "skills": 0.6,  # 60% for skills
            "experience": 0.2,  # 20% for experience
            "job_title": 0.2  # 20% for job titles
        }

        # --- Skills Matching ---
        resume_skills_set = set(resume_skills)
        jd_skills_set = set(jd_skills)

        # Exact skill matches
        exact_skill_matches = resume_skills_set.intersection(jd_skills_set)
        partial_skill_matches = set()

        # Fuzzy matching for partial skill matches
        for resume_skill in resume_skills_set:
            for jd_skill in jd_skills_set:
                if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match with a higher threshold
                    partial_skill_matches.add(resume_skill)

        # Combine exact and partial matches, avoiding double-counting
        total_skill_matches = exact_skill_matches.union(partial_skill_matches)
        skill_match_ratio = len(total_skill_matches) / len(jd_skills_set) if jd_skills_set else 0
        
        # Skills score is based on the match ratio, with more weight given to exact matches
        skill_score = skill_match_ratio * weights['skills'] * max_score
        score += skill_score

        print("\nExact Skill Matches:", exact_skill_matches)
        print("Partial Skill Matches:", partial_skill_matches)
        print("Skill Match Ratio:", skill_match_ratio)
        print("Skill Score:", skill_score)

        # --- Experience Matching ---
        experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0

        if resume_experience >= jd_experience_required:
            # Full marks for experience if the resume experience is greater than or equal to the JD requirement
            experience_score = weights['experience'] * max_score
        else:
            # Proportional score if resume experience is less than required
            experience_score = experience_match_ratio * weights['experience'] * max_score

        score += experience_score

        print("Experience Match Ratio:", experience_match_ratio)
        print("Experience Score:", experience_score)

        # --- Job Title Matching ---
        best_title_match = 0
        for resume_title in resume_job_titles:
            for jd_title in jd_job_titles:
                best_title_match = max(best_title_match, fuzzy_match(resume_title, jd_title))

        # Add job title score if a good match exists
        job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
        score += job_title_score

        print("Best Job Title Match Score:", best_title_match)
        print("Job Title Score:", job_title_score)

        # Return final score rounded to 2 decimal places
        final_score = round(score, 2)
        print("\nFinal Resume Score:", final_score)
        return final_score

    except Exception as e:
        print(f"Error calculating resume score: {e}")
        return 0

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Step 6: Example usage of the functions
def process_resume():
    # Define the PDF file paths
    resume_pdf_path = 'resume4.pdf'
    jd_pdf_path = 'jd.pdf'

    # Extract text from the resume and job description PDFs
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    if resume_text and jd_text:
        # Extract structured information from both the resume and the job description
        resume_info_raw = extract_resume_info(resume_text)
        jd_info_raw = extract_jd_info(jd_text)

        # Save extracted info to .txt files
        save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
        save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')

        # Step 3: Read the extracted info from the .txt files
        resume_info_text = read_file('extracted_resume_info.txt')
        jd_info_text = read_file('extracted_jd_info.txt')

        # Parse extracted information
        resume_info = parse_extracted_info(resume_info_text)
        jd_info = parse_extracted_info(jd_info_text)

        # Debugging output
        print("Resume Info:", resume_info)
        print("JD Info:", jd_info)

        # Step 4: Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Step 5: Calculate resume score
        score = calculate_resume_score(resume_info_text, jd_info_text)

        # Step 6: Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Run the processing function
if __name__ == '__main__':
    process_resume()


Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt
Resume Info: {'Here is the extracted information': '', '* Name': 'Shivam Sood', '* Email': 'soodshivam576@gmail.com', '* Phone Number': '+91 9821074705', '* List of Job Titles': 'Machine Learning Intern, Data Engineering Intern, Data Engineer', '* List of Skills': 'python, c, SQL, MYSQL, POSTGRESQL, Redshift, AWS, Azure Cloud, Pytorch, Scikit-learn, Tensorflow, NLP, Computer Vision, OpenCV, YOLO, Docker, VS Code, DBeaver, Jupyter Notebook, Git, Problem-Solving, Analytical Thinking, Communication, Team Collaboration', '* Years of Experience': "2 (since it's mentioned that the Bachelor's degree was completed in 2025 and the internship experience is from 2020-2021)", '* List of Companies worked with': 'QUBITNETS TECHNOLOGIES, Namasys Analytics'}
JD Info: {'Here is the extracted information': '', '**Company Name**': 'Not explicitly mentioned, but it can be inferred that it\'s "[Insert Company Name]" s

In [13]:
import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Extract key information using regex from the resume and JD text
def extract_info_from_text(text, info_type="resume"):
    """Extracts specific details from the given text using regular expressions."""
    extracted_info = {}

    if info_type == "resume":
        # Extracting resume information using regex
        extracted_info['Name'] = re.search(r'Name:\s*(.*)', text).group(1) if re.search(r'Name:\s*(.*)', text) else None
        extracted_info['Email'] = re.search(r'Email:\s*(.*)', text).group(1) if re.search(r'Email:\s*(.*)', text) else None
        extracted_info['Phone Number'] = re.search(r'Phone Number:\s*(.*)', text).group(1) if re.search(r'Phone Number:\s*(.*)', text) else None
        extracted_info['Job Titles'] = re.search(r'List of Job Titles:\s*(.*)', text).group(1) if re.search(r'List of Job Titles:\s*(.*)', text) else None
        extracted_info['Skills'] = re.search(r'List of Skills:\s*(.*)', text).group(1) if re.search(r'List of Skills:\s*(.*)', text) else None
        extracted_info['Years of Experience'] = re.search(r'Years of Experience:\s*(\d+)', text).group(1) if re.search(r'Years of Experience:\s*(\d+)', text) else None
        extracted_info['Companies'] = re.search(r'List of Companies worked with:\s*(.*)', text).group(1) if re.search(r'List of Companies worked with:\s*(.*)', text) else None
    elif info_type == "jd":
        # Extracting job description information using regex
        extracted_info['Company Name'] = re.search(r'Company Name:\s*(.*)', text).group(1) if re.search(r'Company Name:\s*(.*)', text) else None
        extracted_info['Email'] = re.search(r'Email:\s*(.*)', text).group(1) if re.search(r'Email:\s*(.*)', text) else None
        extracted_info['Phone Number'] = re.search(r'Phone Number:\s*(.*)', text).group(1) if re.search(r'Phone Number:\s*(.*)', text) else None
        extracted_info['Job Titles'] = re.search(r'Job Title:\s*(.*)', text).group(1) if re.search(r'Job Title:\s*(.*)', text) else None
        extracted_info['Required Skills'] = re.search(r'Required Skills:\s*(.*)', text).group(1) if re.search(r'Required Skills:\s*(.*)', text) else None
        extracted_info['Years of Experience'] = re.search(r'Years of Experience required:\s*(\d+)', text).group(1) if re.search(r'Years of Experience required:\s*(\d+)', text) else None

    # Cleaning the extracted info
    for key, value in extracted_info.items():
        if value:
            extracted_info[key] = clean_text(value)

    return extracted_info

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    """Calculate the resume score based on skills, experience, and job title matches."""
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    # Extracted details from the resume and job description
    resume_skills = set(resume_info.get('Skills', '').split(', '))
    jd_skills = set(jd_info.get('Required Skills', '').split(', '))

    # Skill matching (exact and partial match)
    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    # Fuzzy matching for partial skill matches
    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match threshold
                partial_skill_matches.add(resume_skill)

    # Combine exact and partial matches
    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    # Experience matching
    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    # Job title matching
    resume_job_titles = set(resume_info.get('Job Titles', '').split(', '))
    jd_job_titles = set(jd_info.get('Job Titles', '').split(', '))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Step 6: Example usage of the functions
def process_resume():
    # Define the file paths
    resume_info_file = 'extracted_resume_info.txt'
    jd_info_file = 'extracted_jd_info.txt'

    # Read the extracted text from the files
    resume_info_text = read_file(resume_info_file)
    jd_info_text = read_file(jd_info_file)

    if resume_info_text and jd_info_text:
        # Extract structured information from both the resume and the job description
        resume_info = extract_info_from_text(resume_info_text, info_type="resume")
        jd_info = extract_info_from_text(jd_info_text, info_type="jd")

        # Debugging output
        print("Extracted Resume Info:", resume_info)
        print("Extracted JD Info:", jd_info)

        # Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Calculate resume score
        score = calculate_resume_score(resume_info, jd_info)

        # Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Run the processing function
if __name__ == '__main__':
    process_resume()


Extracted Resume Info: {'Name': 'Shivam Sood', 'Email': 'soodshivam576@gmail.com', 'Phone Number': '+91 9821074705', 'Job Titles': 'Machine Learning Intern, Data Engineering Intern, Data Engineer', 'Skills': 'python, c, SQL, MYSQL, POSTGRESQL, Redshift, AWS, Azure Cloud, Pytorch, Scikit-learn, Tensorflow, NLP, Computer Vision, OpenCV, YOLO, Docker, VS Code, DBeaver, Jupyter Notebook, Git, Problem-Solving, Analytical Thinking, Communication, Team Collaboration', 'Years of Experience': '2', 'Companies': 'QUBITNETS TECHNOLOGIES, Namasys Analytics'}
Extracted JD Info: {'Company Name': 'Google LLC', 'Email': 'johndoe@google.com', 'Phone Number': '(555) 555-5555', 'Job Titles': 'Machine Learning Engineer, Data Scientist', 'Required Skills': 'Python, TensorFlow, Keras, PyTorch, Hadoop, Spark, problem-solving skills', 'Years of Experience': '3'}
Resume processed and stored in database 'Google_LLC' with score 65.12


In [None]:
import mysql.connector
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

def get_server_connection():
    """Establishes a connection to the MySQL server."""
    connection = mysql.connector.connect(
        host="localhost",
        user="root",
        password="root"
    )
    return connection

def fuzzy_match(a, b):
    """Calculates the fuzzy match ratio between two strings."""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def clean_text(text):
    """Cleans and normalizes extracted text."""
    text = re.sub(r'[\*\*]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                # Clean up the key by removing leading asterisks and whitespace
                clean_key = key.strip().lstrip('*').strip()
                info[clean_key] = value.strip()
        
        print("Parsed Information:", info)  # Debugging output
        
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

def create_company_db(company_name):
    """Creates a database and information table for the company."""
    sanitized_company_name = company_name.replace(" ", "_").replace("'", "''")
    
    try:
        with get_server_connection() as conn:
            conn.database = sanitized_company_name
            with conn.cursor() as cursor:
                # Use backticks to safely create the database
                cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{sanitized_company_name}`")
                conn.database = sanitized_company_name

                # Create the information table if it doesn't exist
                cursor.execute("""CREATE TABLE IF NOT EXISTS information (
                    id INT AUTO_INCREMENT PRIMARY KEY,
                    name VARCHAR(255),
                    phone_number VARCHAR(50),
                    email VARCHAR(255),
                    skills TEXT,
                    score FLOAT
                )""")
                print(f"Database '{sanitized_company_name}' and table 'information' created or already exists.")

    except mysql.connector.Error as e:
        print(f"Error creating database or table: {e}")

def insert_resume_info(company_name, resume_info, score):
    """Inserts extracted resume info into the database."""
    sanitized_company_name = company_name.replace(" ", "_"). replace("'", "''")
    
    try:
        with get_server_connection() as conn:
            conn.database = sanitized_company_name
            with conn.cursor() as cursor:
                name = resume_info.get('Name')
                phone_number = resume_info.get('Phone Number')
                email = resume_info.get('Email')
                skills = resume_info.get('List of Skills (comma-separated)')

                if not all([name, phone_number, email, skills]):
                    print("Error: Missing required fields. Insertion aborted.")
                    return

                query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
                cursor.execute(query, (name, phone_number, email, skills, score))
                conn.commit()
                print("Resume information inserted successfully.")

    except mysql.connector.Error as e:
        print(f"Error inserting resume info: {e}")

def extract_skills(text, type="resume"):
    """Extracts skills from text using multiple regex patterns."""
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',  
        r'\*\*Skills:\*\*\s*(.+)',                
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'\*\*Required Skills\*\*:\s*(.+)',       
        r'Required Skills:\s*(.+)',              
        r'\*\*Skills\*\*:\s*(.+)',               
        r'List of Skills:\s*(.+)',               
        r'List of Skills: (.+)',                  
        r'Skills:\s*(.*)\n',                      
        r'\* Skills\s*:\s*(.+)',                  
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
        r'\*\*Skills:\*\*\s*(.+)',               
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'Required Skills?:\s*(.+)',            
        r'\*\*Skills\*\*:\s*(.+)',               
        r'List of Skills:\s*(.+)',               
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
        r'\*\*Skills:\*\*\s*(.+)',               
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'Required Skills?:\s*(.+)',            
        r'\*\*Skills\*\*:\s*(.+)',               
        r'Position Titles?:\s*(.+)',           
        r'\*\*Job Title\*\*:\s*(.+)',          
        r'Position\s*(?:Held|Held\s*:\s*|Titles?)\s*:\s*(.+)', 
        r'\b(?:Work Experience|Employment History)\b\s*:\s*(.+)',  
    ]
    
    for pattern in patterns:
        skills_regex = re.search(pattern, text)
        if skills_regex:
            return [clean_text(skill.strip().lower()) for skill in skills_regex.group(1).split(',')]
    
    return []

def extract_job_titles(text, type="resume"):
    """Extracts job titles from text using multiple regex patterns."""
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',  
        r'\*\*Job Titles:\*\*\s*(.+)',               
        r'\* Job Titles:\s*(.+)',                    
        r'Job Titles:\s*(.+)',                       
        r'\*\*Job Title\*\*:\s*(.+)',              
        r'Job Title:\s*(.+)',                        
        r'\*\*Job Titles\*\*:\s*(.+)',              
        r'List of Job Titles:\s*(.+)',              
        r'List of Job Titles\s*:\s*(.+)',           
        r'Job Titles?\s*(?:\(.+?\):)?\s*(.+)',       
        r'\*\*Job Titles:\*\*\s*(.+)',              
        r'\* Job Titles?:\s*(.+)',                  
        r'Job Titles?:\s*(.+)',                     
        r'Position Titles?:\s*(.+)',               
        r'\*\*Job Title\*\*:\s*(.+)',              
        r'Position\s*(?:Held|Held\s*:\s*|Titles?)\s*:\s*(.+)', 
        r'\b(?:Work Experience|Employment History)\b\s*:\s*(.+)',  
    ]
    
    for pattern in patterns:
        job_titles_regex = re.search(pattern, text)
        if job_titles_regex:
            return [clean_text(title.strip().lower()) for title in job_titles_regex.group(1).split(',')]
    
    return []

def extract_experience(text, type="resume"):
    """Extracts years of experience from text using multiple regex patterns."""
    patterns = [
        r'Years of Experience:\s*(\d+)',               
        r'\*\* Years of Experience:\*\*\s*(\d+)',       
        r'\* Years of Experience:\s*(\d+)',            
        r'Experience:\s*(\d+)',                        
        r'Years of Experience Required:\s*(\d+)',      
        r'\*\*Experience Required\*\*:\s*(\d+)',        
    ]
    
    for pattern in patterns:
        experience_regex = re.search(pattern, text)
        if experience_regex:
            return int(experience_regex.group(1).strip())
    
    return 0

def main():
    resume_pdf_path = "resume.pdf"  # Update with actual path
    jd_pdf_path = "jd2.pdf"  # Update with actual path

    # Extract text from PDF resumes and job descriptions
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    # Extract information from resume and job description
    resume_info = extract_resume_info(resume_text)
    jd_info = extract_jd_info(jd_text)

    # Parse the extracted information into dictionaries
    resume_data = parse_extracted_info(resume_info)
    jd_data = parse_extracted_info(jd_info)

    # Create database for the company from job description
    company_name = jd_data.get('Company Name', 'DefaultCompany')
    create_company_db(company_name)

    # Extract skills and job titles
    resume_skills = extract_skills(resume_text)
    resume_job_titles = extract_job_titles(resume_text)
    jd_skills = extract_skills(jd_text, type="jd")
    jd_job_titles = extract_job_titles(jd_text, type="jd")

    # Extract years of experience
    resume_experience = extract_experience(resume_text)
    jd_experience_required = extract_experience(jd_text)

    # Calculate scores
    max_score = 1.0
    weights = {
        "skills": 0.6,
        "experience": 0.3,
        "job_title": 0.1
    }

    # Skill Matching
    total_skill_matches = set(resume_skills) & set(jd_skills)
    skill_match_ratio = len(total_skill_matches) / len(set(jd_skills)) if len(set(jd_skills)) > 0 else 0

    # Experience Matching
    experience_score = 0
    if resume_experience >= jd_experience_required:
        experience_score = 1
    elif resume_experience > 0:
        experience_score = resume_experience / jd_experience_required if jd_experience_required > 0 else 0

    # Job Title Matching
    job_title_score = 0
    for resume_title in resume_job_titles:
        for jd_title in jd_job_titles:
            if fuzzy_match(resume_title, jd_title) > 0.7:
                job_title_score = 1
                break

    # Combine scores based on weights
    final_score = (weights["skills"] * skill_match_ratio +
                   weights["experience"] * experience_score +
                   weights["job_title"] * job_title_score) * max_score

    # Insert extracted resume info into the database
    insert_resume_info(company_name, resume_data, final_score)

    print(f"Final Score for the resume: {final_score:.2f}")

if __name__ == "__main__":
    main()

In [3]:
import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

def get_server_connection():
    """Establishes a connection to the MySQL server using pymysql."""
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # This allows us to return results as dictionaries
    )
    return connection

def fuzzy_match(a, b):
    """Calculates the fuzzy match ratio between two strings."""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def clean_text(text):
    """Cleans and normalizes extracted text."""
    text = re.sub(r'[\*\*]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                clean_key = key.strip().lstrip('*').strip()
                info[clean_key] = value.strip()
        
        print("Parsed Information:", info)  # Debugging output
        
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

def create_company_db(company_name):
    """Creates a database and information table for the company."""
    sanitized_company_name = company_name.replace(" ", "_").replace("'", "''")
    
    try:
        conn = get_server_connection()
        with conn.cursor() as cursor:
            # Use backticks to safely create the database
            cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{sanitized_company_name}`")
            conn.select_db(sanitized_company_name)

            # Create the information table if it doesn't exist
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS information (
                    id INT AUTO_INCREMENT PRIMARY KEY,
                    name VARCHAR(255),
                    phone_number VARCHAR(50),
                    email VARCHAR(255),
                    skills TEXT,
                    score FLOAT
                )
            """)
            print(f"Database '{sanitized_company_name}' and table 'information' created or already exists.")
        conn.commit()
    except pymysql.MySQLError as e:
        print(f"Error creating database or table: {e}")
    finally:
        conn.close()

def insert_resume_info(company_name, resume_info, score):
    """Inserts extracted resume info into the database."""
    sanitized_company_name = company_name.replace(" ", "_"). replace("'", "''")
    
    try:
        conn = get_server_connection()
        with conn.cursor() as cursor:
            conn.select_db(sanitized_company_name)
            name = resume_info.get('Name')
            phone_number = resume_info.get('Phone Number')
            email = resume_info.get('Email')
            skills = resume_info.get('List of Skills (comma-separated)')

            if not all([name, phone_number, email, skills]):
                print("Error: Missing required fields. Insertion aborted.")
                return

            query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
            cursor.execute(query, (name, phone_number, email, skills, score))
            conn.commit()
            print("Resume information inserted successfully.")
    except pymysql.MySQLError as e:
        print(f"Error inserting resume info: {e}")
    finally:
        conn.close()

def extract_skills(text, type="resume"):
    """Extracts skills from text using multiple regex patterns."""
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',  
        r'\*\*Skills:\*\*\s*(.+)',                
        r'\* Skills:\s*(.+)',                    
        r'Skills:\s*(.+)',                       
        r'\*\*Required Skills\*\*:\s*(.+)',       
        r'Required Skills:\s*(.+)',              
        r'\*\*Skills\*\*:\s*(.+)',               
        r'List of Skills:\s*(.+)',               
        r'List of Skills: (.+)',                  
        r'Skills:\s*(.*)\n',                      
        r'\* Skills\s*:\s*(.+)',                  
        r'Skills\s*(?:\(.+?\):)?\s*(.+)',       
    ]
    
    for pattern in patterns:
        skills_regex = re.search(pattern, text)
        if skills_regex:
            return [clean_text(skill.strip().lower()) for skill in skills_regex.group(1).split(',')]
    
    return []

def extract_job_titles(text, type="resume"):
    """Extracts job titles from text using multiple regex patterns."""
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',  
        r'\*\*Job Titles:\*\*\s*(.+)',               
        r'\* Job Titles:\s*(.+)',                    
        r'Job Titles:\s*(.+)',                       
        r'\*\*Job Title\*\*:\s*(.+)',              
        r'Job Title:\s*(.+)',                        
        r'\*\*Job Titles\*\*:\s*(.+)',              
        r'List of Job Titles:\s*(.+)',              
        r'List of Job Titles\s*:\s*(.+)',           
        r'Job Titles?\s*(?:\(.+?\):)?\s*(.+)',       
    ]
    
    for pattern in patterns:
        job_titles_regex = re.search(pattern, text)
        if job_titles_regex:
            return [clean_text(title.strip().lower()) for title in job_titles_regex.group(1).split(',')]
    
    return []

def extract_experience(text, type="resume"):
    """Extracts years of experience from text using multiple regex patterns."""
    patterns = [
        r'Years of Experience:\s*(\d+)',               
        r'\*\* Years of Experience:\*\*\s*(\d+)',       
        r'\* Years of Experience:\s*(\d+)',            
        r'Experience:\s*(\d+)',                        
        r'Years of Experience Required:\s*(\d+)',      
        r'\*\*Experience Required\*\*:\s*(\d+)',        
    ]
    
    for pattern in patterns:
        experience_regex = re.search(pattern, text)
        if experience_regex:
            return int(experience_regex.group(1).strip())
    
    return 0

def main():
    resume_pdf_path = "resume.pdf"  # Update with actual path
    jd_pdf_path = "jd2.pdf"  # Update with actual path

    # Extract text from PDF resumes and job descriptions
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    # Extract information from resume and job description
    resume_info = extract_resume_info(resume_text)
    jd_info = extract_jd_info(jd_text)

    # Parse the extracted information into dictionaries
    resume_data = parse_extracted_info(resume_info)
    jd_data = parse_extracted_info(jd_info)

    # Create database for the company from job description
    company_name = jd_data.get('Company Name', 'DefaultCompany')
    create_company_db(company_name)

    # Extract skills and job titles
    resume_skills = extract_skills(resume_text)
    resume_job_titles = extract_job_titles(resume_text)
    jd_skills = extract_skills(jd_text, type="jd")
    jd_job_titles = extract_job_titles(jd_text, type="jd")

    # Extract years of experience
    resume_experience = extract_experience(resume_text)
    jd_experience_required = extract_experience(jd_text)

    # Calculate scores
    max_score = 1.0
    weights = {
        "skills": 0.6,
        "experience": 0.3,
        "job_title": 0.1
    }

    # Skill Matching
    total_skill_matches = set(resume_skills) & set(jd_skills)
    skill_match_ratio = len(total_skill_matches) / len(set(jd_skills)) if len(set(jd_skills)) > 0 else 0

    # Experience Matching
    experience_score = 0
    if resume_experience >= jd_experience_required:
        experience_score = 1
    elif resume_experience > 0:
        experience_score = resume_experience / jd_experience_required if jd_experience_required > 0 else 0

    # Job Title Matching
    job_title_score = 0
    for resume_title in resume_job_titles:
        for jd_title in jd_job_titles:
            if fuzzy_match(resume_title, jd_title) > 0.7:
                job_title_score = 1
                break

    # Combine scores based on weights
    final_score = (weights["skills"] * skill_match_ratio +
                   weights["experience"] * experience_score +
                   weights["job_title"] * job_title_score) * max_score

    # Insert extracted resume info into the database
    insert_resume_info(company_name, resume_data, final_score)

    print(f"Final Score for the resume: {final_score:.2f}")

if __name__ == "__main__":
    main()


Parsed Information: {'Here is the extracted information': '', 'Name': 'Prashant Singh', 'Email': 'prashantsingha96@gmail.com', 'Phone Number': '+91 8368796901', 'Job Titles': 'Machine Learning Intern, Data Scientist, Machine Learning Enthusiast', 'Skills': 'Python, C, SQL, MYSQL, POSTGRESQL, Redshift, AWS, Azure Cloud, Pytorch, Scikit-learn, Tensorflow, NLP, OpenCV, YOLO, Docker, VS Code, DBeaver, Jupyter Notebook, Git, Problem-Solving, Analytical Thinking, Communication, Team Collaboration', 'Years of Experience': '2 (mentioned in the "WORK HISTORY" section)', 'Companies worked with': 'QUBITNETS TECHNOLOGIES'}
Parsed Information: {'Here is the extracted information': '', 'Company Name': 'Google LLC', 'Email': 'johndoe@google.com', 'Phone Number': '(555) 555-5555', 'Job Title': 'Machine Learning Engineer, Collaborate with cross-functional teams to deploy machine learning solutions at scale', 'List of Required Skills': 'Python, TensorFlow, Keras, PyTorch, large-scale data processing usi

In [4]:
import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

def get_server_connection():
    """Establishes a connection to the MySQL server using pymysql."""
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # This allows us to return results as dictionaries
    )
    return connection

def fuzzy_match(a, b):
    """Calculates the fuzzy match ratio between two strings."""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def clean_text(text):
    """Cleans and normalizes extracted text."""
    text = re.sub(r'[\*\*]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

def parse_extracted_info(text):
    """Parses the extracted information into a dictionary and removes unnecessary fields."""
    info = {}
    try:
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                clean_key = key.strip().lstrip('*').strip()
                info[clean_key] = value.strip()

        # Remove unnecessary or empty fields
        if 'Here is the extracted information' in info:
            info.pop('Here is the extracted information')

        print("Parsed Information:", info)  # Debugging output
        
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

def create_company_db(company_name):
    """Creates a database and information table for the company."""
    sanitized_company_name = company_name.replace(" ", "_").replace("'", "''")
    
    try:
        conn = get_server_connection()
        with conn.cursor() as cursor:
            # Use backticks to safely create the database
            cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{sanitized_company_name}`")
            conn.select_db(sanitized_company_name)

            # Create the information table if it doesn't exist
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS information (
                    id INT AUTO_INCREMENT PRIMARY KEY,
                    name VARCHAR(255),
                    phone_number VARCHAR(50),
                    email VARCHAR(255),
                    skills TEXT,
                    score FLOAT
                )
            """)
            print(f"Database '{sanitized_company_name}' and table 'information' created or already exists.")
        conn.commit()
    except pymysql.MySQLError as e:
        print(f"Error creating database or table: {e}")
    finally:
        conn.close()

def insert_resume_info(company_name, resume_info, score):
    """Inserts extracted resume info into the database."""
    sanitized_company_name = company_name.replace(" ", "_"). replace("'", "''")
    
    try:
        conn = get_server_connection()
        with conn.cursor() as cursor:
            conn.select_db(sanitized_company_name)
            
            # Use correct key names based on parsed dictionary structure
            name = resume_info.get('Name')
            phone_number = resume_info.get('Phone Number')
            email = resume_info.get('Email')
            skills = resume_info.get('Skills')  # Ensure the key is correct

            # Check if required fields are available
            if not all([name, phone_number, email, skills]):
                print(f"Error: Missing required fields - Name: {name}, Phone: {phone_number}, Email: {email}, Skills: {skills}. Insertion aborted.")
                return

            query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
            cursor.execute(query, (name, phone_number, email, skills, score))
            conn.commit()
            print("Resume information inserted successfully.")
    except pymysql.MySQLError as e:
        print(f"Error inserting resume info: {e}")
    finally:
        conn.close()

def main():
    resume_pdf_path = "resume.pdf"  # Update with actual path
    jd_pdf_path = "jd2.pdf"  # Update with actual path

    # Extract text from PDF resumes and job descriptions
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    # Parse the extracted information into dictionaries
    resume_data = parse_extracted_info(resume_text)
    jd_data = parse_extracted_info(jd_text)

    # Create database for the company from job description
    company_name = jd_data.get('Company Name', 'DefaultCompany')
    create_company_db(company_name)

    # Calculate a sample score (could be based on matching logic)
    final_score = 0.3  # Placeholder score based on your logic

    # Insert extracted resume info into the database
    insert_resume_info(company_name, resume_data, final_score)

if __name__ == "__main__":
    main()



Parsed Information: {'Soft Skills - Problem-Solving, Analytical Thinking, Communication, Team Collaboration': '', 'Designed Hospital Management System': 'Developed SQL database schema for efficient hospital data management.', 'Built Predictive House Price Model': 'Engineered regression analysis tool for accurate real estate value estimation.', 'Optimized Model Parameters': 'Fine-tuned features to enhance forecasting precision and reliability.'}
Parsed Information: {'Job Description': 'Machine Learning Engineer', 'Company': 'Google LLC', 'Location': '1600 Amphitheatre Parkway, Mountain View, CA 94043', 'Contact': 'John Doe | Phone: (555) 555-5555 | Email: johndoe@google.com', 'Key Responsibilities': '', 'Required Qualifications': '', 'Preferred Qualifications': "1. Master's or PhD in Machine Learning, AI, or a related field.", 'How to Apply': ''}
Database 'DefaultCompany' and table 'information' created or already exists.
Error: Missing required fields - Name: None, Phone: None, Email: 

In [2]:
import re
import pymysql
from difflib import SequenceMatcher
from PyPDF2 import PdfReader
import pandas as pd

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract data based on multiple patterns
def extract_info(text, info_type="resume"):
    """Extract specific details from the given text using multiple regex patterns."""
    
    def extract_skills(text):
        patterns = [
            r'Skills\s*\(comma-separated\):\s*(.+)',  
            r'\\*Skills:\\\s(.+)',                
            r'\* Skills:\s*(.+)',                    
            r'Skills:\s*(.+)',                       
            r'\\*Required Skills\\:\s(.+)',       
            r'Required Skills:\s*(.+)',              
            r'List of Skills:\s*(.+)',               
        ]
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return [clean_text(skill.strip().lower()) for skill in match.group(1).split(',')]
        return []
    
    def extract_job_titles(text):
        patterns = [
            r'Job Titles\s*\(comma-separated\):\s*(.+)',  
            r'\\*Job Titles:\\\s(.+)',               
            r'\* Job Titles:\s*(.+)',                    
            r'Job Titles:\s*(.+)',                       
            r'\\*Job Title\\:\s(.+)',              
            r'Job Title:\s*(.+)',                        
            r'List of Job Titles:\s*(.+)',              
        ]
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return [clean_text(title.strip().lower()) for title in match.group(1).split(',')]
        return []
    
    def extract_experience(text):
        patterns = [
            r'Years of Experience:\s*(\d+)',               
            r'Experience required:\s*(\d+)',              
            r'\* Years of Experience:\s*(\d+)',            
            r'\d+\s*years? experience(?: required)?',      
            r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)',  
        ]
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return int(match.group(1))
        return 0
    
    # Extract relevant fields
    extracted_info = {}
    extracted_info['Skills'] = extract_skills(text)
    extracted_info['Job Titles'] = extract_job_titles(text)
    extracted_info['Years of Experience'] = extract_experience(text)
    
    # Clean the extracted info
    for key, value in extracted_info.items():
        if isinstance(value, str):
            extracted_info[key] = clean_text(value)
    
    return extracted_info

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), ', '.join(resume_info.get('Skills', [])), score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    """Calculate the resume score based on skills, experience, and job title matches."""
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    # Extracted details from the resume and job description
    resume_skills = set(resume_info.get('Skills', []))
    jd_skills = set(jd_info.get('Skills', []))

    # Skill matching (exact and partial match)
    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    # Fuzzy matching for partial skill matches
    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match threshold
                partial_skill_matches.add(resume_skill)

    # Combine exact and partial matches
    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    # Experience matching
    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    # Job title matching
    resume_job_titles = set(resume_info.get('Job Titles', []))
    jd_job_titles = set(jd_info.get('Job Titles', []))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Step 6: Process the resume and JD
def process_resume():
    # Define the file paths
    resume_info_file = 'extracted_resume_info.txt'
    jd_info_file = 'extracted_jd_info.txt'

    # Read the extracted text from the files
    resume_info_text = read_file(resume_info_file)
    jd_info_text = read_file(jd_info_file)

    if resume_info_text and jd_info_text:
        # Extract structured information from both the resume and the job description
        resume_info = extract_info(resume_info_text, info_type="resume")
        jd_info = extract_info(jd_info_text, info_type="jd")

        # Debugging output
        print("Extracted Resume Info:", resume_info)
        print("Extracted JD Info:", jd_info)

        # Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Calculate resume score
        score = calculate_resume_score(resume_info, jd_info)

        # Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Run the processing function
if __name__ == '__main__':
    process_resume()


Extracted Resume Info: {'Skills': ['python', 'c', 'sql', 'mysql', 'postgresql', 'redshift', 'aws', 'azure cloud', 'pytorch', 'scikit-learn', 'tensorflow', 'nlp', 'computer vision', 'opencv', 'yolo', 'docker', 'vs code', 'dbeaver', 'jupyter notebook', 'git', 'problem-solving', 'analytical thinking', 'communication', 'team collaboration'], 'Job Titles': ['machine learning intern', 'data engineering intern', 'data engineer'], 'Years of Experience': 2}
Extracted JD Info: {'Skills': ['python', 'tensorflow', 'keras', 'pytorch', 'hadoop', 'spark', 'problem-solving skills'], 'Job Titles': ['machine learning engineer', 'data scientist'], 'Years of Experience': 3}
Resume processed and stored in database 'default_company' with score 65.12


In [16]:
import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Enhanced extraction using multiple regex patterns
def extract_info_from_text(text, info_type="resume"):
    """Extracts specific details from the given text using multiple regex patterns."""

    # Helper function for extracting and cleaning text based on patterns
    def extract_field(text, patterns):
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return clean_text(match.group(1))
        return None

    # Defining patterns for each field based on the type (resume or job description)
    if info_type == "resume":
        name_patterns = [
            r'Name:\s*(.+)',
            r'\\*Name\\:\s*(.+)',
            r'Full Name:\s*(.+)',
            r'\bName\b\s*:\s*(.+)'  
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]

    elif info_type == "jd":
        name_patterns = [
            r'Company Name:\s*(.+)',
            r'\b(?:Organization|Employer)\b\s*:\s*(.+)'
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]

    # Extract the relevant fields using the patterns
    extracted_info = {
        'Name': extract_field(text, name_patterns) if info_type == "resume" else extract_field(text, name_patterns),
        'Email': extract_field(text, email_patterns),
        'Phone Number': extract_field(text, phone_patterns),
        'Job Titles': ', '.join(extract_job_titles(text)),  # Use updated extract_job_titles function
        'Skills': ', '.join(extract_skills(text)),  # Use updated extract_skills function
        'Years of Experience': extract_experience(text)  # Use updated extract_experience function
    }

    return extracted_info

# Enhanced pattern-matching functions

def extract_skills(text):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',
        r'\\*Skills\\:\s(.+)',
        r'\* Skills:\s*(.+)',
        r'Skills:\s*(.+)',
        r'\\*Required Skills\\:\s(.+)',
        r'Required Skills:\s*(.+)',
        r'List of Skills:\s*(.+)',
        r'\bSkills\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(skill.strip().lower()) for skill in match.group(1).split(',')]
    return []

def extract_job_titles(text):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',
        r'\\*Job Titles\\:\s(.+)',
        r'\* Job Titles:\s*(.+)',
        r'Job Titles:\s*(.+)',
        r'\\*Job Title\\:\s(.+)',
        r'Job Title:\s*(.+)',
        r'List of Job Titles:\s*(.+)',
        r'\b(?:Positions|Roles)\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(title.strip().lower()) for title in match.group(1).split(',')]
    return []

def extract_experience(text):
    patterns = [
        r'Years of Experience:\s*(\d+)',
        r'Experience required:\s*(\d+)',
        r'\* Years of Experience:\s*(\d+)',
        r'\d+\s*years? experience(?: required)?',
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return int(match.group(1))
    return 0

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    """Calculate the resume score based on skills, experience, and job title matches."""
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    # Extracted details from the resume and job description
    resume_skills = set(resume_info.get('Skills', '').split(', '))
    jd_skills = set(jd_info.get('Required Skills', '').split(', '))

    # Skill matching (exact and partial match)
    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    # Fuzzy matching for partial skill matches
    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match threshold
                partial_skill_matches.add(resume_skill)

    # Combine exact and partial matches
    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    # Experience matching
    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    # Job title matching
    resume_job_titles = set(resume_info.get('Job Titles', '').split(', '))
    jd_job_titles = set(jd_info.get('Job Titles', '').split(', '))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Step 6: Example usage of the functions
def process_resume():
    # Define the file paths
    resume_info_file = 'extracted_resume_info.txt'
    jd_info_file = 'extracted_jd_info.txt'

    # Read the extracted text from the files
    resume_info_text = read_file(resume_info_file)
    jd_info_text = read_file(jd_info_file)

    if resume_info_text and jd_info_text:
        # Extract structured information from both the resume and the job description
        resume_info = extract_info_from_text(resume_info_text, info_type="resume")
        jd_info = extract_info_from_text(jd_info_text, info_type="jd")

        # Debugging output
        print("Extracted Resume Info:", resume_info)
        print("Extracted JD Info:", jd_info)

        # Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Calculate resume score
        score = calculate_resume_score(resume_info, jd_info)

        # Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Run the processing function
if __name__ == '__main__':
    process_resume()


Extracted Resume Info: {'Name': '** Shivam Sood', 'Email': None, 'Phone Number': None, 'Job Titles': '** machine learning engineer, data engineering intern, intern', 'Skills': '** python, c, sql, mysql, postgresql, redshift, aws, azure cloud, pytorch, scikit-learn, tensorflow, nlp, opencv, yolo, docker, vs code, dbeaver, jupyter notebook, git, problem-solving, analytical thinking, communication, team collaboration', 'Years of Experience': 0}
Extracted JD Info: {'Name': 'Google LLC', 'Email': 'johndoe@google.com', 'Phone Number': None, 'Job Titles': 'machine learning engineer, ai-powered solutions, machine learning pipeline engineer', 'Skills': 'python, tensorflow, keras, pytorch, hadoop, spark, excellent problem-solving skills', 'Years of Experience': 3}
Resume processed and stored in database 'default_company' with score 18.87


In [1]:
from langchain_community.llms import Ollama
MODEL = "llama3"
model = Ollama(model=MODEL)

  model = Ollama(model=MODEL)


In [21]:
import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Enhanced extraction using multiple regex patterns
def extract_info_from_text(text, info_type="resume"):
    """Extracts specific details from the given text using multiple regex patterns."""

    # Helper function for extracting and cleaning text based on patterns
    def extract_field(text, patterns):
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return clean_text(match.group(1))
        return None

    # Defining patterns for each field based on the type (resume or job description)
    if info_type == "resume":
        name_patterns = [
            r'Name:\s*(.+)',  # New pattern for "Name:"
            r'\\*Name\\:\s*(.+)',
            r'Full Name:\s*(.+)',
            r'\*\*Name:\*\*\s*(.+)',  # Pattern for **Name:** format
            r'\bName\b\s*:\s*(.+)'  
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\*\*Email:\*\*\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',  # New pattern for **Email:** format
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'\*\*Phone Number:\*\*\s*(\+?\d[\d\s-]+)',  # New pattern for **Phone Number:** format
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]
        company_patterns = [
            r'Companies worked with:\s*(.+)',  # Pattern for "Companies worked with:"
            r'\*\*Companies worked with:\*\*\s*(.+)'  # Pattern for **Companies worked with:** format
        ]

    elif info_type == "jd":
        name_patterns = [
            r'Company Name:\s*(.+)',
            r'\b(?:Organization|Employer)\b\s*:\s*(.+)'
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]

    # Extract the relevant fields using the patterns
    extracted_info = {
        'Name': extract_field(text, name_patterns),
        'Email': extract_field(text, email_patterns),
        'Phone Number': extract_field(text, phone_patterns),
        'Job Titles': ', '.join(extract_job_titles(text)),  # Use updated extract_job_titles function
        'Skills': ', '.join(extract_skills(text)),  # Use updated extract_skills function
        'Years of Experience': extract_experience(text),  # Use updated extract_experience function
    }

    return extracted_info

# Enhanced pattern-matching functions

def extract_skills(text):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',
        r'\*\*Skills:\*\*\s*(.+)',  # Pattern for **Skills:** format
        r'\\*Skills\\:\s(.+)',
        r'\* Skills:\s*(.+)',
        r'Skills:\s*(.+)',
        r'\\*Required Skills\\:\s(.+)',
        r'Required Skills:\s*(.+)',
        r'List of Skills:\s*(.+)',
        r'\bSkills\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(skill.strip().lower()) for skill in match.group(1).split(',')]
    return []

def extract_job_titles(text):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',
        r'\*\*Job Titles:\*\*\s*(.+)',  # Pattern for **Job Titles:** format
        r'\\*Job Titles\\:\s(.+)',
        r'\* Job Titles:\s*(.+)',
        r'Job Titles:\s*(.+)',
        r'\\*Job Title\\:\s(.+)',
        r'Job Title:\s*(.+)',
        r'List of Job Titles:\s*(.+)',
        r'\b(?:Positions|Roles)\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(title.strip().lower()) for title in match.group(1).split(',')]
    return []

def extract_experience(text):
    patterns = [
        r'Years of Experience:\s*(\d+)',
        r'\*\*Years of Experience:\*\*\s*(\d+)',  # Pattern for **Years of Experience:** format
        r'Experience required:\s*(\d+)',
        r'\* Years of Experience:\s*(\d+)',
        r'\d+\s*years? experience(?: required)?',
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return int(match.group(1))
    return 0

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    """Calculate the resume score based on skills, experience, and job title matches."""
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    # Extracted details from the resume and job description
    resume_skills = set(resume_info.get('Skills', '').split(', '))
    jd_skills = set(jd_info.get('Required Skills', '').split(', '))

    # Skill matching (exact and partial match)
    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    # Fuzzy matching for partial skill matches
    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match threshold
                partial_skill_matches.add(resume_skill)

    # Combine exact and partial matches
    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    # Experience matching
    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    # Job title matching
    resume_job_titles = set(resume_info.get('Job Titles', '').split(', '))
    jd_job_titles = set(jd_info.get('Job Titles', '').split(', '))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Step 6: Example usage of the functions
def process_resume():
    # Define the file paths
    resume_info_file = 'extracted_resume_info.txt'
    jd_info_file = 'extracted_jd_info.txt'

    # Read the extracted text from the files
    resume_info_text = read_file(resume_info_file)
    jd_info_text = read_file(jd_info_file)

    if resume_info_text and jd_info_text:
        # Extract structured information from both the resume and the job description
        resume_info = extract_info_from_text(resume_info_text, info_type="resume")
        jd_info = extract_info_from_text(jd_info_text, info_type="jd")

        # Debugging output
        print("Extracted Resume Info:", resume_info)
        print("Extracted JD Info:", jd_info)

        # Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Calculate resume score
        score = calculate_resume_score(resume_info, jd_info)

        # Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Run the processing function
if __name__ == '__main__':
    process_resume()


Extracted Resume Info: {'Name': '** Not explicitly stated in the resume, but the "First Last" section suggests that it might be something like "John Smith".', 'Email': 'profesionalemail@resumeworded.com', 'Phone Number': '+1-234-456-789', 'Job Titles': 'machine learning engineer, automation engineer, computer systems analyst', 'Skills': 'deep learning (advanced), predictive modeling (experienced), statistical analysis, algorithms, english (native), german (fluent), french (conversational)', 'Years of Experience': 10}
Extracted JD Info: {'Name': 'Google LLC', 'Email': 'johndoe@google.com', 'Phone Number': None, 'Job Titles': 'machine learning engineer, collaborate with product managers, data scientists, and engineers to deliver ai-powered solutions, improve existing systems by analyzing and enhancing machine learning pipelines, ensure seamless integration of machine learning models into production, monitor, debug, and troubleshoot model performance and scalability issues', 'Skills': 'py

In [23]:
import re
from PyPDF2 import PdfReader
import pymysql
from difflib import SequenceMatcher

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Enhanced extraction using multiple regex patterns
def extract_info_from_text(text, info_type="resume"):
    """Extracts specific details from the given text using multiple regex patterns."""

    def extract_field(text, patterns):
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return clean_text(match.group(1))
        return None

    if info_type == "resume":
        name_patterns = [
            r'Name:\s*(.+)',  
            r'\\*Name\\:\s*(.+)',
            r'Full Name:\s*(.+)',
            r'\*\*Name:\*\*\s*(.+)',
            r'\bName\b\s*:\s*(.+)'  
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\*\*Email:\*\*\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'\*\*Phone Number:\*\*\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]
        company_patterns = [
            r'Companies worked with:\s*(.+)',
            r'\*\*Companies worked with:\*\*\s*(.+)'
        ]

    elif info_type == "jd":
        name_patterns = [
            r'Company Name:\s*(.+)',
            r'\b(?:Organization|Employer)\b\s*:\s*(.+)'
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]

    extracted_info = {
        'Name': extract_field(text, name_patterns),
        'Email': extract_field(text, email_patterns),
        'Phone Number': extract_field(text, phone_patterns),
        'Job Titles': ', '.join(extract_job_titles(text)), 
        'Skills': ', '.join(extract_skills(text)),  
        'Years of Experience': extract_experience(text),  
    }

    return extracted_info

# Enhanced pattern-matching functions
def extract_skills(text):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',
        r'\*\*Skills:\*\*\s*(.+)',  
        r'\\*Skills\\:\s(.+)',
        r'\* Skills:\s*(.+)',
        r'Skills:\s*(.+)',
        r'\\*Required Skills\\:\s(.+)',
        r'Required Skills:\s*(.+)',
        r'List of Skills:\s*(.+)',
        r'\bSkills\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(skill.strip().lower()) for skill in match.group(1).split(',')]
    return []

def extract_job_titles(text):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',
        r'\*\*Job Titles:\*\*\s*(.+)',  
        r'\\*Job Titles\\:\s(.+)',
        r'\* Job Titles:\s*(.+)',
        r'Job Titles:\s*(.+)',
        r'\\*Job Title\\:\s(.+)',
        r'Job Title:\s*(.+)',
        r'List of Job Titles:\s*(.+)',
        r'\b(?:Positions|Roles)\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(title.strip().lower()) for title in match.group(1).split(',')]
    return []

def extract_experience(text):
    patterns = [
        r'Years of Experience:\s*(\d+)',
        r'\*\*Years of Experience:\*\*\s*(\d+)',  
        r'Experience required:\s*(\d+)',
        r'\* Years of Experience:\s*(\d+)',
        r'\d+\s*years? experience(?: required)?',
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return int(match.group(1))
    return 0

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()
        conn.select_db(company_name)
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        conn.select_db(company_name)
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()
    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    resume_skills = set(resume_info.get('Skills', '').split(', '))
    jd_skills = set(jd_info.get('Required Skills', '').split(', '))

    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:
                partial_skill_matches.add(resume_skill)

    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    resume_job_titles = set(resume_info.get('Job Titles', '').split(', '))
    jd_job_titles = set(jd_info.get('Job Titles', '').split(', '))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Step 6: Example usage of the functions
def process_resume():
    resume_info_file = 'extracted_resume_info.txt'
    jd_info_file = 'extracted_jd_info.txt'

    resume_info_text = read_file(resume_info_file)
    jd_info_text = read_file(jd_info_file)

    if resume_info_text and jd_info_text:
        resume_info = extract_info_from_text(resume_info_text, info_type="resume")
        jd_info = extract_info_from_text(jd_info_text, info_type="jd")

        print("Extracted Resume Info:", resume_info)
        print("Extracted JD Info:", jd_info)

        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        score = calculate_resume_score(resume_info, jd_info)

        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Helper function to read file contents
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

if __name__ == '__main__':
    process_resume()


Extracted Resume Info: {'Name': '** Not explicitly stated in the resume, but the "First Last" section suggests that it might be something like "John Smith".', 'Email': 'profesionalemail@resumeworded.com', 'Phone Number': '+1-234-456-789', 'Job Titles': 'machine learning engineer, automation engineer, computer systems analyst', 'Skills': 'deep learning (advanced), predictive modeling (experienced), statistical analysis, algorithms, english (native), german (fluent), french (conversational)', 'Years of Experience': 10}
Extracted JD Info: {'Name': 'Google LLC', 'Email': 'johndoe@google.com', 'Phone Number': None, 'Job Titles': 'machine learning engineer, collaborate with product managers, data scientists, and engineers to deliver ai-powered solutions, improve existing systems by analyzing and enhancing machine learning pipelines, ensure seamless integration of machine learning models into production, monitor, debug, and troubleshoot model performance and scalability issues', 'Skills': 'py

In [24]:
import re
from PyPDF2 import PdfReader
import pymysql
from difflib import SequenceMatcher
from langchain_community.llms import Ollama

# Initialize the LLaMA model
MODEL = "llama3"
model = Ollama(model=MODEL)

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return '

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Please ensure that the 'Years of Experience' includes only professional job or internship experience, not education experience.

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

# Step 6: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()
        conn.select_db(company_name)
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 7: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        conn.select_db(company_name)
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()
    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 8: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    """Calculate the resume score based on skills, experience, and job title matches."""
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    # Extracted details from the resume and job description
    resume_skills = set(resume_info.get('Skills', '').split(', '))
    jd_skills = set(jd_info.get('Required Skills', '').split(', '))

    # Skill matching (exact and partial match)
    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    # Fuzzy matching for partial skill matches
    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match threshold
                partial_skill_matches.add(resume_skill)

    # Combine exact and partial matches
    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    # Experience matching
    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    # Job title matching
    resume_job_titles = set(resume_info.get('Job Titles', '').split(', '))
    jd_job_titles = set(jd_info.get('Job Titles', '').split(', '))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Step 9: Main process that extracts data, saves it, and calculates score
def process_resume(resume_pdf_path, jd_pdf_path):
    # Extract text from the resume and job description PDFs
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    if resume_text and jd_text:
        # Extract structured information from both the resume and the job description using LLaMA
        resume_info_raw = extract_resume_info(resume_text)
        jd_info_raw = extract_jd_info(jd_text)

        # Save the extracted info to text files
        save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
        save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')

        # Parse the raw extracted information into dictionaries
        resume_info = parse_extracted_info(resume_info_raw)
        jd_info = parse_extracted_info(jd_info_raw)

        # Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Calculate resume score
        score = calculate_resume_score(resume_info, jd_info)

        # Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Could not extract text from one or both PDFs.")

# Run the process
if __name__ == '__main__':
    resume_pdf_path = 'resume.pdf'  # Path to the resume PDF
    jd_pdf_path = 'jd2.pdf'          # Path to the job description PDF
    process_resume(resume_pdf_path, jd_pdf_path)


Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt
Resume processed and stored in database 'default_company' with score 80.0


In [25]:
from langchain_community.llms import Ollama
MODEL = "llama3"
model = Ollama(model=MODEL)
import re
from PyPDF2 import PdfReader


# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Please ensure that the 'Years of Experience' includes only professional job or internship experience, not education experience.

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''


# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''


# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")


# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info


# Step 6: Example usage of the functions

# Define the PDF file paths
resume_pdf_path = 'resume3.pdf'
jd_pdf_path = 'jd2.pdf'

# Extract text from the resume and job description PDFs
resume_text = read_pdf(resume_pdf_path)
jd_text = read_pdf(jd_pdf_path)


import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Enhanced extraction using multiple regex patterns
def extract_info_from_text(text, info_type="resume"):
    """Extracts specific details from the given text using multiple regex patterns."""

    # Helper function for extracting and cleaning text based on patterns
    def extract_field(text, patterns):
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return clean_text(match.group(1))
        return None

    # Defining patterns for each field based on the type (resume or job description)
    if info_type == "resume":
        name_patterns = [
            r'Name:\s*(.+)',  # New pattern for "Name:"
            r'\\*Name\\:\s*(.+)',
            r'Full Name:\s*(.+)',
            r'\*\*Name:\*\*\s*(.+)',  # Pattern for **Name:** format
            r'\bName\b\s*:\s*(.+)'  
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\*\*Email:\*\*\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',  # New pattern for **Email:** format
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'\*\*Phone Number:\*\*\s*(\+?\d[\d\s-]+)',  # New pattern for **Phone Number:** format
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]
        company_patterns = [
            r'Companies worked with:\s*(.+)',  # Pattern for "Companies worked with:"
            r'\*\*Companies worked with:\*\*\s*(.+)'  # Pattern for **Companies worked with:** format
        ]

    elif info_type == "jd":
        name_patterns = [
            r'Company Name:\s*(.+)',
            r'\b(?:Organization|Employer)\b\s*:\s*(.+)'
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]

    # Extract the relevant fields using the patterns
    extracted_info = {
        'Name': extract_field(text, name_patterns),
        'Email': extract_field(text, email_patterns),
        'Phone Number': extract_field(text, phone_patterns),
        'Job Titles': ', '.join(extract_job_titles(text)),  # Use updated extract_job_titles function
        'Skills': ', '.join(extract_skills(text)),  # Use updated extract_skills function
        'Years of Experience': extract_experience(text),  # Use updated extract_experience function
    }

    return extracted_info

# Enhanced pattern-matching functions

def extract_skills(text):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',
        r'\*\*Skills:\*\*\s*(.+)',  # Pattern for **Skills:** format
        r'\\*Skills\\:\s(.+)',
        r'\* Skills:\s*(.+)',
        r'Skills:\s*(.+)',
        r'\\*Required Skills\\:\s(.+)',
        r'Required Skills:\s*(.+)',
        r'List of Skills:\s*(.+)',
        r'\bSkills\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(skill.strip().lower()) for skill in match.group(1).split(',')]
    return []

def extract_job_titles(text):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',
        r'\*\*Job Titles:\*\*\s*(.+)',  # Pattern for **Job Titles:** format
        r'\\*Job Titles\\:\s(.+)',
        r'\* Job Titles:\s*(.+)',
        r'Job Titles:\s*(.+)',
        r'\\*Job Title\\:\s(.+)',
        r'Job Title:\s*(.+)',
        r'List of Job Titles:\s*(.+)',
        r'\b(?:Positions|Roles)\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(title.strip().lower()) for title in match.group(1).split(',')]
    return []

def extract_experience(text):
    patterns = [
        r'Years of Experience:\s*(\d+)',
        r'\*\*Years of Experience:\*\*\s*(\d+)',  # Pattern for **Years of Experience:** format
        r'Experience required:\s*(\d+)',
        r'\* Years of Experience:\s*(\d+)',
        r'\d+\s*years? experience(?: required)?',
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return int(match.group(1))
    return 0

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    """Calculate the resume score based on skills, experience, and job title matches."""
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    # Extracted details from the resume and job description
    resume_skills = set(resume_info.get('Skills', '').split(', '))
    jd_skills = set(jd_info.get('Required Skills', '').split(', '))

    # Skill matching (exact and partial match)
    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    # Fuzzy matching for partial skill matches
    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match threshold
                partial_skill_matches.add(resume_skill)

    # Combine exact and partial matches
    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    # Experience matching
    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    # Job title matching
    resume_job_titles = set(resume_info.get('Job Titles', '').split(', '))
    jd_job_titles = set(jd_info.get('Job Titles', '').split(', '))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Step 6: Example usage of the functions
def process_resume():
    # Define the file paths
    resume_info_file = 'extracted_resume_info.txt'
    jd_info_file = 'extracted_jd_info.txt'

    # Read the extracted text from the files
    resume_info_text = read_file(resume_info_file)
    jd_info_text = read_file(jd_info_file)

    if resume_info_text and jd_info_text:
        # Extract structured information from both the resume and the job description
        resume_info = extract_info_from_text(resume_info_text, info_type="resume")
        jd_info = extract_info_from_text(jd_info_text, info_type="jd")

        # Debugging output
        print("Extracted Resume Info:", resume_info)
        print("Extracted JD Info:", jd_info)

        # Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Calculate resume score
        score = calculate_resume_score(resume_info, jd_info)

        # Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        print(f"Resume processed and stored in database '{company_name}' with score {score}")
    else:
        print("Error: Failed to process resume or job description.")

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Run the processing function
if resume_text and jd_text:
    # Extract structured information from both the resume and the job description
    resume_info_raw = extract_resume_info(resume_text)
    jd_info_raw = extract_jd_info(jd_text)

    # Parse the raw extracted information into dictionaries (optional for verification)
    resume_info = parse_extracted_info(resume_info_raw)
    jd_info = parse_extracted_info(jd_info_raw)

    # Step 7: Save extracted resume and JD information to .txt files
    save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
    save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')
    
    process_resume()

else:
    print("Error: Could not extract text from one or both PDFs.")


Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt
Extracted Resume Info: {'Name': '** Not mentioned explicitly, but can be inferred as "First Last"', 'Email': None, 'Phone Number': '+1-234-456-789', 'Job Titles': '** machine learning engineer, automation engineer, computer systems analyst', 'Skills': '** deep learning, predictive modeling, statistical analysis, algorithms, english, german, french', 'Years of Experience': 10}
Extracted JD Info: {'Name': 'Google LLC', 'Email': 'johndoe@google.com', 'Phone Number': None, 'Job Titles': 'machine learning engineer, collaborate with product managers, data scientists, and engineers to deliver ai-powered solutions, improve existing systems by analyzing and enhancing machine learning pipelines, ensure seamless integration of machine learning models into production, monitor, debug, and troubleshoot model performance and scalability issues', 'Skills': "bachelor's degree in computer science, engineering, or r

In [30]:
from langchain_community.llms import Ollama
MODEL = "llama3"
model = Ollama(model=MODEL)
import re
from PyPDF2 import PdfReader
import gradio as gr

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Please ensure that the 'Years of Experience' includes only professional job or internship experience, not education experience.

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''


# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''


# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")


# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info


import pymysql
import re
from difflib import SequenceMatcher
from PyPDF2 import PdfReader

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Enhanced extraction using multiple regex patterns
def extract_info_from_text(text, info_type="resume"):
    """Extracts specific details from the given text using multiple regex patterns."""

    # Helper function for extracting and cleaning text based on patterns
    def extract_field(text, patterns):
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return clean_text(match.group(1))
        return None

    # Defining patterns for each field based on the type (resume or job description)
    if info_type == "resume":
        name_patterns = [
            r'Name:\s*(.+)',  # New pattern for "Name:"
            r'\\*Name\\:\s*(.+)',
            r'Full Name:\s*(.+)',
            r'\*\*Name:\*\*\s*(.+)',  # Pattern for **Name:** format
            r'\bName\b\s*:\s*(.+)'  
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\*\*Email:\*\*\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',  # New pattern for **Email:** format
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'\*\*Phone Number:\*\*\s*(\+?\d[\d\s-]+)',  # New pattern for **Phone Number:** format
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]
        company_patterns = [
            r'Companies worked with:\s*(.+)',  # Pattern for "Companies worked with:"
            r'\*\*Companies worked with:\*\*\s*(.+)'  # Pattern for **Companies worked with:** format
        ]

    elif info_type == "jd":
        name_patterns = [
            r'Company Name:\s*(.+)',
            r'\b(?:Organization|Employer)\b\s*:\s*(.+)'
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]

    # Extract the relevant fields using the patterns
    extracted_info = {
        'Name': extract_field(text, name_patterns),
        'Email': extract_field(text, email_patterns),
        'Phone Number': extract_field(text, phone_patterns),
        'Job Titles': ', '.join(extract_job_titles(text)),  # Use updated extract_job_titles function
        'Skills': ', '.join(extract_skills(text)),  # Use updated extract_skills function
        'Years of Experience': extract_experience(text),  # Use updated extract_experience function
    }

    return extracted_info

# Enhanced pattern-matching functions

def extract_skills(text):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',
        r'\*\*Skills:\*\*\s*(.+)',  # Pattern for **Skills:** format
        r'\\*Skills\\:\s(.+)',
        r'\* Skills:\s*(.+)',
        r'Skills:\s*(.+)',
        r'\\*Required Skills\\:\s(.+)',
        r'Required Skills:\s*(.+)',
        r'List of Skills:\s*(.+)',
        r'\bSkills\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(skill.strip().lower()) for skill in match.group(1).split(',')]
    return []

def extract_job_titles(text):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',
        r'\*\*Job Titles:\*\*\s*(.+)',  # Pattern for **Job Titles:** format
        r'\\*Job Titles\\:\s(.+)',
        r'\* Job Titles:\s*(.+)',
        r'Job Titles:\s*(.+)',
        r'\\*Job Title\\:\s(.+)',
        r'Job Title:\s*(.+)',
        r'List of Job Titles:\s*(.+)',
        r'\b(?:Positions|Roles)\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(title.strip().lower()) for title in match.group(1).split(',')]
    return []

def extract_experience(text):
    patterns = [
        r'Years of Experience:\s*(\d+)',
        r'\*\*Years of Experience:\*\*\s*(\d+)',  # Pattern for **Years of Experience:** format
        r'Experience required:\s*(\d+)',
        r'\* Years of Experience:\s*(\d+)',
        r'\d+\s*years? experience(?: required)?',
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return int(match.group(1))
    return 0

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    """Calculate the resume score based on skills, experience, and job title matches."""
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    # Extracted details from the resume and job description
    resume_skills = set(resume_info.get('Skills', '').split(', '))
    jd_skills = set(jd_info.get('Required Skills', '').split(', '))

    # Skill matching (exact and partial match)
    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    # Fuzzy matching for partial skill matches
    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match threshold
                partial_skill_matches.add(resume_skill)

    # Combine exact and partial matches
    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    # Experience matching
    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    # Job title matching
    resume_job_titles = set(resume_info.get('Job Titles', '').split(', '))
    jd_job_titles = set(jd_info.get('Job Titles', '').split(', '))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Step 6: Example usage of the functions
def process_resume(resume_pdf, jd_pdf):
# Define the PDF file paths
    resume_pdf_path = resume_pdf
    jd_pdf_path = jd_pdf

    # Extract text from the resume and job description PDFs
    resume_text = read_pdf(resume_pdf_path)
    jd_text = read_pdf(jd_pdf_path)

    resume_info_raw = extract_resume_info(resume_text)
    jd_info_raw = extract_jd_info(jd_text)

    # Parse the raw extracted information into dictionaries (optional for verification)
    resume_info = parse_extracted_info(resume_info_raw)
    jd_info = parse_extracted_info(jd_info_raw)

    # Step 7: Save extracted resume and JD information to .txt files
    save_extracted_info(resume_info_raw, 'extracted_resume_info.txt')
    save_extracted_info(jd_info_raw, 'extracted_jd_info.txt')

    # Define the file paths
    resume_info_file = 'extracted_resume_info.txt'
    jd_info_file = 'extracted_jd_info.txt'

    # Read the extracted text from the files
    resume_info_text = read_file(resume_info_file)
    jd_info_text = read_file(jd_info_file)

    if resume_info_text and jd_info_text:
        # Extract structured information from both the resume and the job description
        resume_info = extract_info_from_text(resume_info_text, info_type="resume")
        jd_info = extract_info_from_text(jd_info_text, info_type="jd")

        # Debugging output
        print("Extracted Resume Info:", resume_info)
        print("Extracted JD Info:", jd_info)

        # Create a database based on the company name
        company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
        create_company_db(company_name)

        # Calculate resume score
        score = calculate_resume_score(resume_info, jd_info)

        # Insert resume info into the database
        insert_resume_info(company_name, resume_info, score)

        return f"Resume processed and stored in database '{company_name}' with score {score}"
    else:
        print("Error: Failed to process resume or job description.")

# Function to read the contents of a file
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

# Gradio interface
gr.Interface(
    fn=process_resume,
    inputs=["file", "file"],
    outputs="text",
    title="Resume Scoring",
    description="Upload a resume and a job description to calculate the resume score."
).launch()


Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.




--------


Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt
Extracted Resume Info: {'Name': 'Prashant Singh', 'Email': 'prashantsingha96@gmail.com', 'Phone Number': '+91 8368796901', 'Job Titles': 'machine learning intern, data scientist and machine learning enthusiast (comma-separated format)', 'Skills': 'python, c, sql, mysql, postgresql, redshift, aws, azure cloud, pytorch, scikit-learn, tensorflow, nlp, opencv, yolo, docker, vs code, dbeaver, jupyter notebook, git (comma-separated format)', 'Years of Experience': 2}
Extracted JD Info: {'Name': 'Google LLC', 'Email': 'johndoe@google.com', 'Phone Number': None, 'Job Titles': 'machine learning engineer, ai-powered solutions developer, large-scale data processing specialist', 'Skills': 'python, tensorflow, keras, pytorch, hadoop, spark, excellent problem-solving skills, ability to work in a fast-paced environment', 'Years of Experience': 3}


In [31]:
from langchain_community.llms import Ollama
MODEL = "llama3"
model = Ollama(model=MODEL)
import re
from PyPDF2 import PdfReader
import pymysql
from difflib import SequenceMatcher
import gradio as gr

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Please ensure that the 'Years of Experience' includes only professional job or internship experience, not education experience.

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Enhanced extraction using multiple regex patterns
def extract_info_from_text(text, info_type="resume"):
    """Extracts specific details from the given text using multiple regex patterns."""
    # Helper function for extracting and cleaning text based on patterns
    def extract_field(text, patterns):
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return clean_text(match.group(1))
        return None

    # Defining patterns for each field based on the type (resume or job description)
    if info_type == "resume":
        name_patterns = [
            r'Name:\s*(.+)',  # New pattern for "Name:"
            r'\\*Name\\:\s*(.+)',
            r'Full Name:\s*(.+)',
            r'\*\*Name:\*\*\s*(.+)',  # Pattern for **Name:** format
            r'\bName\b\s*:\s*(.+)'  
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\*\*Email:\*\*\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',  # New pattern for **Email:** format
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'\*\*Phone Number:\*\*\s*(\+?\d[\d\s-]+)',  # New pattern for **Phone Number:** format
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]
        company_patterns = [
            r'Companies worked with:\s*(.+)',  # Pattern for "Companies worked with:"
            r'\*\*Companies worked with:\*\*\s*(.+)'  # Pattern for **Companies worked with:** format
        ]

    elif info_type == "jd":
        name_patterns = [
            r'Company Name:\s*(.+)',
            r'\b(?:Organization|Employer)\b\s*:\s*(.+)'
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]

    # Extract the relevant fields using the patterns
    extracted_info = {
        'Name': extract_field(text, name_patterns),
        'Email': extract_field(text, email_patterns),
        'Phone Number': extract_field(text, phone_patterns),
        'Job Titles': ', '.join(extract_job_titles(text)),  # Use updated extract_job_titles function
        'Skills': ', '.join(extract_skills(text)),  # Use updated extract_skills function
        'Years of Experience': extract_experience(text),  # Use updated extract_experience function
    }

    return extracted_info

# Enhanced pattern-matching functions

def extract_skills(text):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',
        r'\*\*Skills:\*\*\s*(.+)',  # Pattern for **Skills:** format
        r'\\*Skills\\:\s(.+)',
        r'\* Skills:\s*(.+)',
        r'Skills:\s*(.+)',
        r'\\*Required Skills\\:\s(.+)',
        r'Required Skills:\s*(.+)',
        r'List of Skills:\s*(.+)',
        r'\bSkills\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(skill.strip().lower()) for skill in match.group(1).split(',')]
    return []

def extract_job_titles(text):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',
        r'\*\*Job Titles:\*\*\s*(.+)',  # Pattern for **Job Titles:** format
        r'\\*Job Titles\\:\s(.+)',
        r'\* Job Titles:\s*(.+)',
        r'Job Titles:\s*(.+)',
        r'\\*Job Title\\:\s(.+)',
        r'Job Title:\s*(.+)',
        r'List of Job Titles :\s*(.+)',
        r'\b(?:Positions|Roles)\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(title.strip().lower()) for title in match.group(1).split(',')]
    return []

def extract_experience(text):
    patterns = [
        r'Years of Experience:\s*(\d+)',
        r'\*\*Years of Experience:\*\*\s*(\d+)',  # Pattern for **Years of Experience:** format
        r'Experience required:\s*(\d+)',
        r'\* Years of Experience:\s*(\d+)',
        r'\d+\s*years? experience(?: required)?',
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return int(match.group(1))
    return 0

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_info, jd_info):
    """Calculate the resume score based on skills, experience, and job title matches."""
    score = 0
    max_score = 100
    weights = {
        "skills": 0.6,
        "experience": 0.2,
        "job_title": 0.2
    }

    # Extracted details from the resume and job description
    resume_skills = set(resume_info.get('Skills', '').split(', '))
    jd_skills = set(jd_info.get('Required Skills', '').split(', '))

    # Skill matching (exact and partial match)
    exact_skill_matches = resume_skills.intersection(jd_skills)
    partial_skill_matches = set()

    # Fuzzy matching for partial skill matches
    for resume_skill in resume_skills:
        for jd_skill in jd_skills:
            if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match threshold
                partial_skill_matches.add(resume_skill)

    # Combine exact and partial matches
    total_skill_matches = exact_skill_matches.union(partial_skill_matches)
    skill_match_ratio = len(total_skill_matches) / len(jd_skills) if jd_skills else 0
    skill_score = skill_match_ratio * weights['skills'] * max_score
    score += skill_score

    # Experience matching
    resume_experience = int(resume_info.get('Years of Experience', 0))
    jd_experience_required = int(jd_info.get('Years of Experience', 0))
    experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0
    experience_score = experience_match_ratio * weights['experience'] * max_score
    score += experience_score

    # Job title matching
    resume_job_titles = set(resume_info.get('Job Titles', '').split(', '))
    jd_job_titles = set(jd_info.get('Job Titles', '').split(', '))
    best_title_match = max([fuzzy_match(rjt, jjt) for rjt in resume_job_titles for jjt in jd_job_titles], default=0)
    job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
    score += job_title_score

    return round(score, 2)

# Gradio interface
def process_resume(resume_pdf, jd_pdf):
    # Extract text from the PDFs
    resume_text = read_pdf(resume_pdf)
    jd_text = read_pdf(jd_pdf)

    # Extract structured information from both the resume and the job description
    resume_info_raw = extract_resume_info(resume_text)
    jd_info_raw = extract_jd_info(jd_text)

    # Parse the raw extracted information into dictionaries (optional for verification)
    resume_info = parse_extracted_info(resume_info_raw)
    jd_info = parse_extracted_info(jd_info_raw)

    # Create a database based on the company name
    company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
    create_company_db(company_name)

    # Calculate resume score
    score = calculate_resume_score(resume_info, jd_info)

    # Insert resume info into the database
    insert_resume_info(company_name, resume_info, score)

    return f"Resume processed and stored in database '{company_name}' with score {score}"

# Gradio interface
gr.Interface(
    fn=process_resume,
    inputs=["file", "file"],
    outputs="text",
    title="Resume Scoring",
    description="Upload a resume and a job description to calculate the resume score."
).launch()

Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.




--------


Error calculating resume score: extract_skills() got an unexpected keyword argument 'type'


In [35]:
from langchain_community.llms import Ollama
MODEL = "llama3"
model = Ollama(model=MODEL)
import re
from PyPDF2 import PdfReader
import pymysql
from difflib import SequenceMatcher
import gradio as gr

# Step 1: Extract text from PDF using PyPDF2
def read_pdf(file):
    """Reads the PDF and extracts text from it."""
    try:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ''

# Step 2: Function to extract information from resumes using LLaMA
def extract_resume_info(resume_text):
    """Extracts specific details from resume text using LLaMA."""
    prompt = f"""
    Extract the following information from this resume:
    - Name
    - Email
    - Phone Number
    - List of Job Titles (in a comma-separated format)
    - List of Skills (in a comma-separated format)
    - Years of Experience (in numbers, no text, just the number of years)
    - List of Companies worked with (in a comma-separated format)

    Please ensure that the 'Years of Experience' includes only professional job or internship experience, not education experience.

    Here is the resume:
    {resume_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting resume info: {e}")
        return ''

# Step 3: Extract job description info using LLaMA
def extract_jd_info(jd_text):
    """Extracts specific details from a job description using LLaMA."""
    prompt = f"""
    Extract the following information from this job description:
    - Company Name
    - Email
    - Phone Number
    - Job Title (in a comma-separated format)
    - List of Required Skills (in a comma-separated format)
    - Years of Experience required (in numbers, no text, just the number of years)

    Here is the job description:
    {jd_text}
    """
    try:
        # Get the response from the LLaMA model
        response = model.invoke(prompt)
        return response.strip()  # Returning raw string response
    except Exception as e:
        print(f"Error in extracting JD info: {e}")
        return ''

# Step 4: Function to save extracted information to a .txt file
def save_extracted_info(info_text, output_file):
    """Saves the extracted information to a text file."""
    try:
        with open(output_file, 'w') as f:
            f.write(info_text)  # Save raw string info directly
        print(f"Information saved to {output_file}")
    except Exception as e:
        print(f"Error saving info: {e}")

# Step 5: Helper function to parse the extracted information from text to dictionary
def parse_extracted_info(text):
    """Parses the extracted information into a dictionary."""
    info = {}
    try:
        # Simple parsing by splitting lines and using key-value pairs
        lines = text.split('\n')
        for line in lines:
            if ':' in line:
                key, value = line.split(':', 1)
                info[key.strip()] = value.strip()
    except Exception as e:
        print(f"Error parsing extracted info: {e}")
    return info

# MySQL connection (without specifying a database)
def get_server_connection():
    connection = pymysql.connect(
        host="localhost",
        user="root",
        password="root",
        cursorclass=pymysql.cursors.DictCursor  # DictCursor to get results as dictionaries
    )
    return connection

# Helper function for fuzzy matching
def fuzzy_match(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

# Helper function to clean and normalize extracted text
def clean_text(text):
    text = re.sub(r'[\\]+', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Enhanced extraction using multiple regex patterns
def extract_info_from_text(text, info_type="resume"):
    """Extracts specific details from the given text using multiple regex patterns."""
    # Helper function for extracting and cleaning text based on patterns
    def extract_field(text, patterns):
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return clean_text(match.group(1))
        return None

    # Defining patterns for each field based on the type (resume or job description)
    if info_type == "resume":
        name_patterns = [
            r'Name:\s*(.+)',  # New pattern for "Name:"
            r'\\*Name\\:\s*(.+)',
            r'Full Name:\s*(.+)',
            r'\*\*Name:\*\*\s*(.+)',  # Pattern for **Name:** format
            r'\bName\b\s*:\s*(.+)'  
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\*\*Email:\*\*\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',  # New pattern for **Email:** format
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'\*\*Phone Number:\*\*\s*(\+?\d[\d\s-]+)',  # New pattern for **Phone Number:** format
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]
        company_patterns = [
            r'Companies worked with:\s*(.+)',  # Pattern for "Companies worked with:"
            r'\*\*Companies worked with:\*\*\s*(.+)'  # Pattern for **Companies worked with:** format
        ]

    elif info_type == "jd":
        name_patterns = [
            r'Company Name:\s*(.+)',
            r'\b(?:Organization|Employer)\b\s*:\s*(.+)'
        ]
        email_patterns = [
            r'Email:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)',
            r'\b(?:E-mail|Email)\b\s*:\s*([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)'
        ]
        phone_patterns = [
            r'Phone Number:\s*(\+?\d[\d\s-]+)',
            r'Contact Number:\s*(\+?\d[\d\s-]+)',
            r'\b(?:Phone|Telephone|Contact)\b\s*:\s*(\+?\d[\d\s-]+)'
        ]

    # Extract the relevant fields using the patterns
    extracted_info = {
        'Name': extract_field(text, name_patterns),
        'Email': extract_field(text, email_patterns),
        'Phone Number': extract_field(text, phone_patterns),
        'Job Titles': ', '.join(extract_job_titles(text)),  # Use updated extract_job_titles function
        'Skills': ', '.join(extract_skills(text)),  # Use updated extract_skills function
        'Years of Experience': extract_experience(text),  # Use updated extract_experience function
    }

    return extracted_info

# Enhanced pattern-matching functions

def extract_skills(text):
    patterns = [
        r'Skills\s*\(comma-separated\):\s*(.+)',
        r'\*\*Skills:\*\*\s*(.+)',  # Pattern for **Skills:** format
        r'\\*Skills\\:\s(.+)',
        r'\* Skills:\s*(.+)',
        r'Skills:\s*(.+)',
        r'\\*Required Skills\\:\s(.+)',
        r'Required Skills:\s*(.+)',
        r'List of Skills:\s*(.+)',
        r'\bSkills\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(skill.strip().lower()) for skill in match.group(1).split(',')]
    return []

def extract_job_titles(text):
    patterns = [
        r'Job Titles\s*\(comma-separated\):\s*(.+)',
        r'\*\*Job Titles:\*\*\s*(.+)',  # Pattern for **Job Titles:** format
        r'\\*Job Titles\\:\s(.+)',
        r'\* Job Titles:\s*(.+)',
        r'Job Titles:\s*(.+)',
        r'\\*Job Title\\:\s(.+)',
        r'Job Title:\s*(.+)',
        r'List of Job Titles :\s*(.+)',
        r'\b(?:Positions|Roles)\b\s*:\s*(.+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return [clean_text(title.strip().lower()) for title in match.group(1).split(',')]
    return []

def extract_experience(text):
    patterns = [
        r'Years of Experience:\s*(\d+)',
        r'\*\*Years of Experience:\*\*\s*(\d+)',  # Pattern for **Years of Experience:** format
        r'Experience required:\s*(\d+)',
        r'\* Years of Experience:\s*(\d+)',
        r'\d+\s*years? experience(?: required)?',
        r'\b(?:Experience|Professional Experience)\b\s*:\s*(\d+)'
    ]
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return int(match.group(1))
    return 0

# Step 3: Function to create a database and information table
def create_company_db(company_name):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Create the database if it doesn't exist
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{company_name}`")
        conn.commit()

        # Select the created database
        conn.select_db(company_name)

        # Create the information table if it doesn't exist
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS information (
                id INT AUTO_INCREMENT PRIMARY KEY,
                name VARCHAR(255),
                phone_number VARCHAR(20),
                email VARCHAR(255),
                skills TEXT,
                score FLOAT
            )
        """)
        conn.commit()
    except Exception as e:
        print(f"Error creating database/table: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 4: Function to insert extracted resume info into the table
def insert_resume_info(company_name, resume_info, score):
    conn = get_server_connection()
    cursor = conn.cursor()
    try:
        # Select the database
        conn.select_db(company_name)

        # Insert into the database
        query = "INSERT INTO information (name, phone_number, email, skills, score) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (resume_info.get('Name'), resume_info.get('Phone Number'), resume_info.get('Email'), resume_info.get('Skills'), score))
        conn.commit()

    except Exception as e:
        print(f"Error inserting resume info into the database: {e}")
    finally:
        cursor.close()
        conn.close()

# Step 5: Function to calculate the resume score based on matching criteria
def calculate_resume_score(resume_text, jd_text):
    try:
        # Extract resume info
        resume_skills = extract_skills(resume_text)
        resume_job_titles = extract_job_titles(resume_text)
        resume_experience = extract_experience(resume_text)
        
        # Extract JD info
        jd_skills = extract_skills(jd_text)
        jd_job_titles = extract_job_titles(jd_text)
        jd_experience_required = extract_experience(jd_text)

        # Print extracted information for debugging
        print("Resume Skills:", resume_skills)
        print("Resume Job Titles:", resume_job_titles)
        print("Resume Experience (Years):", resume_experience)

        print("\nJD Skills:", jd_skills)
        print("JD Job Titles:", jd_job_titles)
        print("JD Experience Required (Years):", jd_experience_required)

        # Initialize score variables
        score = 0
        max_score = 100
        weights = {
            "skills": 0.6,  # 60% for skills
            "experience": 0.2,  # 20% for experience
            "job_title": 0.2  # 20% for job titles
        }

        # --- Skills Matching ---
        resume_skills_set = set(resume_skills)
        jd_skills_set = set(jd_skills)

        # Exact skill matches
        exact_skill_matches = resume_skills_set.intersection(jd_skills_set)
        partial_skill_matches = set()

        # Fuzzy matching for partial skill matches
        for resume_skill in resume_skills_set:
            for jd_skill in jd_skills_set:
                if fuzzy_match(resume_skill, jd_skill) > 0.55:  # Fuzzy match with a higher threshold
                    partial_skill_matches.add(resume_skill)

        # Combine exact and partial matches, avoiding double-counting
        total_skill_matches = exact_skill_matches.union(partial_skill_matches)
        skill_match_ratio = len(total_skill_matches) / len(jd_skills_set) if jd_skills_set else 0
        
        # Skills score is based on the match ratio, with more weight given to exact matches
        skill_score = skill_match_ratio * weights['skills'] * max_score
        score += skill_score

        print("\nExact Skill Matches:", exact_skill_matches)
        print("Partial Skill Matches:", partial_skill_matches)
        print("Skill Match Ratio:", skill_match_ratio)
        print("Skill Score:", skill_score)

        # --- Experience Matching ---
        experience_match_ratio = resume_experience / jd_experience_required if jd_experience_required else 0

        if resume_experience >= jd_experience_required:
            # Full marks for experience if the resume experience is greater than or equal to the JD requirement
            experience_score = weights['experience'] * max_score
        else:
            # Proportional score if resume experience is less than required
            experience_score = experience_match_ratio * weights['experience'] * max_score

        score += experience_score

        print("Experience Match Ratio:", experience_match_ratio)
        print("Experience Score:", experience_score)

        # --- Job Title Matching ---
        best_title_match = 0
        for resume_title in resume_job_titles:
            for jd_title in jd_job_titles:
                best_title_match = max(best_title_match, fuzzy_match(resume_title, jd_title))

        # Add job title score if a good match exists
        job_title_score = best_title_match * weights['job_title'] * max_score if best_title_match > 0.7 else 0
        score += job_title_score

        print("Best Job Title Match Score:", best_title_match)
        print("Job Title Score:", job_title_score)

        # Return final score rounded to 2 decimal places
        final_score = round(score, 2)
        print("\nFinal Resume Score:", final_score)
        return final_score

    except Exception as e:
        print(f"Error calculating resume score: {e}")
        return 0

# Gradio interface
def process_resume(resume_pdf, jd_pdf):
    # Extract text from the PDFs
    resume_text = read_pdf(resume_pdf)
    jd_text = read_pdf(jd_pdf)

    # Extract structured information from both the resume and the job description
    resume_info_raw = extract_resume_info(resume_text)
    jd_info_raw = extract_jd_info(jd_text)

    # Parse the raw extracted information into dictionaries (optional for verification)
    resume_info = parse_extracted_info(resume_info_raw)
    jd_info = parse_extracted_info(jd_info_raw)

    # Create a database based on the company name
    company_name = jd_info.get('Company Name', 'default_company').replace(' ', '_')
    create_company_db(company_name)

    # Calculate resume score
    score = calculate_resume_score(resume_text, jd_text)

    # Insert resume info into the database
    insert_resume_info(company_name, resume_info, score)

    return f"Resume processed and stored in database '{company_name}' with score {score}"

# Gradio interface
gr.Interface(
    fn=process_resume,
    inputs=["file", "file"],
    outputs="text",
    title="Resume Scoring",
    description="Upload a resume and a job description to calculate the resume score."
).launch()

Running on local URL:  http://127.0.0.1:7867

To create a public link, set `share=True` in `launch()`.




--------


Resume Skills: []
Resume Job Titles: []
Resume Experience (Years): 0

JD Skills: []
JD Job Titles: []
JD Experience Required (Years): 0

Exact Skill Matches: set()
Partial Skill Matches: set()
Skill Match Ratio: 0
Skill Score: 0.0
Experience Match Ratio: 0
Experience Score: 20.0
Best Job Title Match Score: 0
Job Title Score: 0

Final Resume Score: 20.0
Information saved to extracted_resume_info.txt
Information saved to extracted_jd_info.txt
Extracted Resume Info: {'Name': None, 'Email': None, 'Phone Number': None, 'Job Titles': '', 'Skills': '', 'Years of Experience': 0}
Extracted JD Info: {'Name': None, 'Email': None, 'Phone Number': None, 'Job Titles': '', 'Skills': '', 'Years of Experience': 0}
Error calculating resume score: expected string or bytes-like object, got 'dict'
