In [36]:
# JobVacancy, CV

### Генерация данных

In [37]:
import csv
from datetime import datetime

# Sample data based on provided schema and sample data
job_titles = ['Маляр', 'Водопроводчик', 'Электрик', 'Бетоноукладчик', 'Разнорабочий', 'Монтажник', 'Управляющий', 'Прораб']
order_payments = [5000, 10000, 15000, 20000, 25000]
job_locations = ['Краснодар', 'Анапа', 'Новороссийск', 'Сочи', 'Геленджик', 'Армавир']
skills_low = ['Кладка кирпича', 'Штукатурка', 'Малярные работы', 'Укладка ламината', 'Монтаж гипсокартона']
skills_high = ['Монтаж электрики', 'Укладка плитки', 'Монтаж сантехники', 'Монтаж систем отопления', 'Монтаж систем вентиляции']
skills_management = ['Управленческие навыки', 'Контроль ведения работ', 'Планирование проектов', 'Организация трудовых ресурсов', 'Управление бюджетом']

job_skills = {
    job_titles[0]: [skills_low[0], skills_low[1], skills_low[2], skills_low[3], skills_low[4]],
    job_titles[1]: [skills_high[2], skills_high[3]],
    job_titles[2]: [skills_high[0], skills_high[1], skills_high[2], skills_high[3], skills_high[4]],
    job_titles[3]: [skills_low[3], skills_low[4], skills_high[0]],
    job_titles[4]: [skills_low[2], skills_low[3], skills_low[4], skills_high[0], skills_high[2]],
    job_titles[5]: [skills_high[0], skills_high[1], skills_high[2], skills_high[3], skills_high[4]],
    job_titles[6]: [skills_management[0], skills_management[1], skills_management[2], skills_management[3], skills_management[4]],
    job_titles[7]: [skills_management[0], skills_management[2], skills_management[3]]
}

# Generate sample data for JobVacancy
job_vacancies = [
    {
        "id": i + 1,
        "title": job_titles[i % len(job_titles)],
        "priceFrom": str(order_payments[i % len(order_payments)]),
        "priceTo": str(order_payments[(i + 1) % len(order_payments)]),
        "priceExact": str(order_payments[(i + 2) % len(order_payments)]),
        "place": job_locations[i % len(job_locations)],
        "dateFrom": datetime.now().strftime('%Y-%m-%d'),
        "dateTo": (datetime.now().replace(month=(datetime.now().month % 12) + 1)).strftime('%Y-%m-%d'),
        "requireSelfEmployment": bool(i % 2),
        "summary": f"Summary for {job_titles[i % len(job_titles)]}",
        "content": f"Content for {job_titles[i % len(job_titles)]}",
        "organisation": f"Organization {i + 1}",
        "skills": job_skills[job_titles[i % len(job_titles)]],
        "skillsCount": len(job_skills[job_titles[i % len(job_titles)]]),
        "createdAt": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    }
    for i in range(100)
]

# Generate sample data for CV
cvs = [
    {
        "id": i + 1,
        "user": f"User №{i + 1}",
        "title": f"Профессия №{i+1}",
        "priceFrom": str(order_payments[i % len(order_payments)]),
        "priceTo": str(order_payments[(i + 1) % len(order_payments)]),
        "priceExact": str(order_payments[(i + 2) % len(order_payments)]),
        "place": job_locations[i % len(job_locations)],
        "dateFrom": datetime.now().strftime('%Y-%m-%d'),
        "dateTo": (datetime.now().replace(month=(datetime.now().month % 12) + 1)).strftime('%Y-%m-%d'),
        "summary": f"Описание для резюме № {i + 1}",
        "content": f"Контент для резюме № {i + 1}",
        "skills": skills,
        "skillsCount": len(skills),
        "createdAt": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    }
    for i, skills in enumerate([skills_low if i % 2 == 0 else skills_high for i in range(100)])
]

# Write job vacancies to CSV
with open('job_vacancies.csv', 'w', newline='') as csvfile:
    fieldnames = ["id", "title", "priceFrom", "priceTo", "priceExact", "place", "dateFrom", "dateTo", "requireSelfEmployment", "summary", "content", "organisation", "skills", "skillsCount", "createdAt"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for job in job_vacancies:
        writer.writerow(job)

# Write CVs to CS
with open('cvs.csv', 'w', newline='') as csvfile:
    fieldnames = ["id", "user", "title", "priceFrom", "priceTo", "priceExact", "place", "dateFrom", "dateTo", "summary", "content", "skills", "skillsCount", "createdAt"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for cv in cvs:
        writer.writerow(cv)

### Загрузка и подготовка данных

In [51]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity


# Assuming cvs and job_vacancies are defined elsewhere in the code
# Create interaction matrix
interaction_matrix = np.zeros((len(cvs), len(job_vacancies)))

for i, cv in enumerate(cvs):
    for j, job in enumerate(job_vacancies):
        common_skills = set(cv["skills"]).intersection(set(job["skills"]))
        skill_count = len(common_skills)
        price_diff = abs(int(cv["priceExact"]) - int(job["priceExact"]))  # Convert to integers
        normalized_price_diff = 1 - (price_diff / max(int(cv["priceExact"]), int(job["priceExact"])))  # Convert to integers
        interaction_matrix[i, j] = skill_count + normalized_price_diff

# Convert to DataFrame
interaction_df = pd.DataFrame(interaction_matrix, index=[cv["title"] for cv in cvs], columns=[job["title"] for job in job_vacancies])

train_data, test_data = train_test_split(interaction_df, test_size=0.2, random_state=42)


def calculate_similarity(cv, job):
    common_skills = set(cv["skills"]).intersection(set(job["skills"]))
    skill_count = len(common_skills)
    price_diff = abs(int(cv["priceExact"]) - int(job["priceExact"]))
    normalized_price_diff = 1 - (price_diff / max(int(cv["priceExact"]), int(job["priceExact"])))
    similarity_score = skill_count + normalized_price_diff
    
    # Additional factors to consider for similarity
    # Add more factors as needed
    if cv["place"] == job["place"]:
        similarity_score += 1
    
    # Calculate cosine similarity
    # cos_sim = cosine_similarity(np.array([cv["skills"]]), np.array([job["skills"]]))

    return similarity_score


# Calculate similarity scores for CVs and jobs
similarity_scores = np.zeros((len(cvs), len(job_vacancies)))

for i, cv in enumerate(cvs):
    for j, job in enumerate(job_vacancies):
        similarity_scores[i, j] = calculate_similarity(cv, job)


In [59]:
# Select a specific CV (e.g., the first CV)
cv_index = 0
cv = cvs[cv_index]

# Find the top 3 most similar jobs for the selected CV
top_jobs_indices = np.argsort(similarity_scores[cv_index])[::-1]

# Use a set to track already recommended jobs to avoid duplicates
recommended_jobs = set()
top_jobs = []
for idx in top_jobs_indices:
    job = job_vacancies[idx]
    if job["title"] not in recommended_jobs:
        recommended_jobs.add(job["title"])
        similarity_score = similarity_scores[cv_index, idx]
        max_possible_score = len(cv["skills"]) + 1  # Add 1 for the place similarity factor
        similarity_percentage = (similarity_score / max_possible_score) * 100
        top_jobs.append((job, similarity_score, similarity_percentage))
    if len(top_jobs) == 10:
        break

# Print the selected CV and the top 3 recommended jobs with scores
print(f"CV {cv['title']}: {', '.join(cv['skills'])}\n")
print("Рекомендуем вакансии:\n")
for job, score, percentage in top_jobs:
    if percentage > 100:
        percentage = 100
    print(f"(Баллы схожести: {score:.0f}, {percentage:.0f}%) \t| {job['title']}: {', '.join(job['skills'])} ")

CV Профессия №1: Кладка кирпича, Штукатурка, Малярные работы, Укладка ламината, Монтаж гипсокартона

Рекомендуем вакансии:

(Баллы схожести: 7, 100%) 	| Маляр: Кладка кирпича, Штукатурка, Малярные работы, Укладка ламината, Монтаж гипсокартона 
(Баллы схожести: 5, 83%) 	| Разнорабочий: Малярные работы, Укладка ламината, Монтаж гипсокартона, Монтаж электрики, Монтаж сантехники 
(Баллы схожести: 3, 50%) 	| Бетоноукладчик: Укладка ламината, Монтаж гипсокартона, Монтаж электрики 
(Баллы схожести: 2, 33%) 	| Управляющий: Управленческие навыки, Контроль ведения работ, Планирование проектов, Организация трудовых ресурсов, Управление бюджетом 
(Баллы схожести: 2, 33%) 	| Электрик: Монтаж электрики, Укладка плитки, Монтаж сантехники, Монтаж систем отопления, Монтаж систем вентиляции 
(Баллы схожести: 1, 17%) 	| Прораб: Управленческие навыки, Планирование проектов, Организация трудовых ресурсов 
(Баллы схожести: 1, 17%) 	| Монтажник: Монтаж электрики, Укладка плитки, Монтаж сантехники, Монтаж сис