# resume organizer

In [None]:
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import shutil # this is important for moving files

In [None]:
# Step 1: Load and preprocess documents
def load_documents_from_folder(folder_path):
    documents = []
    file_paths = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.txt'):  # Assuming text files, adjust if needed
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    documents.append(f.read())
                    file_paths.append(file_path)
    return documents, file_paths

In [None]:
# Step 2: Compute TF-IDF and similarity
def compute_similarity(documents, folder_names):
    vectorizer = TfidfVectorizer()
    doc_vectors = vectorizer.fit_transform(documents + folder_names)
    similarity_matrix = cosine_similarity(doc_vectors[:-len(folder_names)], doc_vectors[-len(folder_names):])
    return similarity_matrix

In [None]:
# Step 3: Recommend folders based on similarity
def recommend_folders(similarity_matrix, file_paths, folder_names):
    recommendations = {}
    for i, file_path in enumerate(file_paths):
        recommended_folder_index = similarity_matrix[i].argmax()
        recommended_folder = folder_names[recommended_folder_index]
        recommendations[file_path] = recommended_folder
    return recommendations

In [None]:
# Step 4: Move documents to recommended folders
def move_documents(recommendations):
    for file_path, recommended_folder in recommendations.items():
        destination = os.path.join(recommended_folder, os.path.basename(file_path))
        shutil.move(file_path, destination)
        print(f'Moved {file_path} to {destination}')

In [None]:
# Main function
def organize_resumes(base_folder, folder_names):
    documents, file_paths = load_documents_from_folder(base_folder)
    similarity_matrix = compute_similarity(documents, folder_names)
    recommendations = recommend_folders(similarity_matrix, file_paths, folder_names)
    move_documents(recommendations)

In [None]:
# Example usage
base_folder = 'path/to/resumes'
folder_names = ['Engineering', 'Marketing', 'Data Science']  # Example categories
organize_resumes(base_folder, folder_names)