In [None]:
from sentence_transformers import SentenceTransformer, util
import pandas as pd

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Load CSVs directly (paths must match Colab paths)
df_emp = pd.read_csv('/content/employee_profiles.csv')
df_tasks = pd.read_csv('/content/task_catalog .csv')

# Standardize column names (remove spaces and lowercase)
df_emp.columns = df_emp.columns.str.strip().str.lower().str.replace(" ", "_")
df_tasks.columns = df_tasks.columns.str.strip().str.lower().str.replace(" ", "_")

# Print for debug
print("🧾 Employee Columns:", df_emp.columns.tolist())
print("🧾 Task Columns:", df_tasks.columns.tolist())

# ✅ Use 'skills' column instead of 'resume_text'
emp_embeddings = model.encode(df_emp['skills'], convert_to_tensor=True)
task_embeddings = model.encode(df_tasks['task_description'], convert_to_tensor=True)

# Calculate cosine similarity
cos_sim = util.cos_sim(emp_embeddings, task_embeddings)

# Match top tasks for each employee
matches = []
for emp_idx in range(len(df_emp)):
    best_task_idx = cos_sim[emp_idx].argmax().item()
    best_score = cos_sim[emp_idx][best_task_idx].item()
    best_task = df_tasks.iloc[best_task_idx]
    matches.append({
        'employee_id': df_emp.iloc[emp_idx]['employee_id'],
        'name': df_emp.iloc[emp_idx]['name'],
        'role': df_emp.iloc[emp_idx]['role'],
        'matched_task': best_task['task_description'],
        'automation_possible': best_task['automation_possible'],
        'suggested_tool': best_task['suggested_tool'],
        'match_score': round(best_score, 4)
    })

# Final result
final_df = pd.DataFrame(matches)
print("✅ Best Task Matches per Employee")
print(final_df.sort_values(by='match_score', ascending=False).head(10))


🧾 Employee Columns: ['employee_id', 'name', 'role', 'skills']
🧾 Task Columns: ['task_id', 'task_description', 'automation_possible', 'suggested_tool']
✅ Best Task Matches per Employee
   employee_id     name             role                  matched_task  \
3          104    David    HR Specialist             Handle onboarding   
4          105      Eva  Finance Analyst  Prepare financial statements   
2          103  Charlie      ML Engineer          Coordinate logistics   
0          101    Alice   Data Scientist          Coordinate logistics   
1          102      Bob  DevOps Engineer             Schedule meetings   

  automation_possible            suggested_tool  match_score  
3                  No                         -       0.5580  
4                 Yes      Excel Macro + Python       0.5383  
2                 Yes       Trello + Slack Bots       0.1696  
0                 Yes       Trello + Slack Bots       0.1594  
1                 Yes  Google Calendar + Zapier       0.