In [None]:
import os
import spacy
import docx2txt
import numpy as np

# Load English language model for spaCy
nlp = spacy.load("en_core_web_sm")

# Function to extract text from PDF or DOCX files
def extract_text(file_path):
    _, file_extension = os.path.splitext(file_path)
    if file_extension == '.pdf':
        # Code to extract text from PDF
        pass
    elif file_extension == '.docx':
        try:
            return docx2txt.process(file_path)
        except Exception as e:
            print(f"Error extracting text from {file_path}: {e}")
            return ""
    else:
        raise ValueError("Unsupported file format")

# Function to preprocess text
def preprocess_text(text):
    # Implement your preprocessing steps here
    # Example: Lowercasing, removing punctuation, etc.
    if text is not None:
        return text.lower()
    else:
        return ""

# Function to extract key information from resumes
def extract_resume_info(resume_text):
    doc = nlp(resume_text)
    return doc.vector  # Return document vector obtained from spaCy's Word2Vec embeddings

# Sample job description and resume paths
job_description_path = "/content/fab JD.docx"  # Provide the path to your job description file
resume_paths = ["/content/C_Cv.docx", "/content/july 3 cv.docx","/content/sst_cv.docx"]  # List of resume file paths

# Extract text from job description
job_description_text = extract_text(job_description_path)
job_description_text = preprocess_text(job_description_text)

# Vectorize job description using Word2Vec embeddings
job_description_vector = nlp(job_description_text).vector

# List to store similarity scores and corresponding resume paths
rankings = []

for resume_path in resume_paths:
    # Extract text from resume
    resume_text = extract_text(resume_path)
    resume_text = preprocess_text(resume_text)

    # Extract key information from resume and obtain document vector
    resume_vector = extract_resume_info(resume_text)

    # Compute cosine similarity between job description and resume vectors
    similarity_score = np.dot(job_description_vector, resume_vector) / (np.linalg.norm(job_description_vector) * np.linalg.norm(resume_vector))

    # Store similarity score and resume path in the rankings list
    rankings.append((similarity_score, resume_path))

# Sort the rankings based on similarity scores (descending order)
rankings.sort(reverse=True)

# Print the rankings
print("Rankings:")
for i, (score, resume_path) in enumerate(rankings, start=1):
    print(f"{i}. Resume: {resume_path}, Similarity Score: {score}")