In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/human-resources-data-set/HRDataset_v14.csv
/kaggle/input/ibm-hr-analytics-attrition-dataset/WA_Fn-UseC_-HR-Employee-Attrition.csv


In [None]:
!pip install -q bitsandbytes accelerate transformers sentencepiece ipywidgets pandas numpy torch scikit-learn tqdm

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from sklearn.preprocessing import StandardScaler
import warnings, random, pickle, os

# Fix warnings and tokenizer parallelism

In [None]:
warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Reproducibility

In [None]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

# Device

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load datasets

In [None]:
print("Loading HR datasets...")
ibm = pd.read_csv("/kaggle/input/ibm-hr-analytics-attrition-dataset/WA_Fn-UseC_-HR-Employee-Attrition.csv")
huebner = pd.read_csv("/kaggle/input/human-resources-data-set/HRDataset_v14.csv")
real_roles = sorted(list(set(ibm["JobRole"].dropna()) | set(huebner["Position"].dropna())))
print(f"Found {len(real_roles)} real job roles")

# Career ladders with strong progression paths

In [None]:
ladders = [
    ["Data Analyst", "Data Scientist", "Senior Data Scientist", "Lead Data Scientist", "Head of Data"],
    ["Sales Executive", "Sales Manager", "Director of Sales", "VP Sales"],
    ["Laboratory Technician", "Research Scientist", "Senior Research Scientist"],
    ["Software Engineer", "Senior Software Engineer", "Tech Lead", "Software Engineering Manager", "Director of Engineering"],
    ["Human Resources", "HR Manager", "HR Director", "VP Human Resources"],
    ["Manager", "Director", "VP"],
    ["Network Engineer", "Senior Network Engineer", "Network Architect", "IT Director"]
]

# Generate strong promotion edges

In [None]:
promotions = []
for ladder in ladders:
    for i in range(len(ladder)-1):
        promotions.extend([(ladder[i], ladder[i+1])] * 15000)

# Full role list

In [None]:
all_roles = sorted(list(set(real_roles) | set(r for l in ladders for r in l)))
role_to_id = {r: i for i, r in enumerate(all_roles)}
n_nodes = len(all_roles)

# Build adjacency matrix

In [None]:
print("Building career graph...")
adj = np.zeros((n_nodes, n_nodes), dtype=np.float32)
for src, dst in promotions:
    adj[role_to_id[src], role_to_id[dst]] += 1.0
row_sums = adj.sum(axis=1, keepdims=True)
row_sums[row_sums == 0] = 1.0
adj = adj / row_sums
np.fill_diagonal(adj, 0.2)
adj_tensor = torch.from_numpy(adj).float().to(device)

# Real node features from IBM dataset

In [None]:
feature_cols = ["Age", "MonthlyIncome", "YearsAtCompany", "TotalWorkingYears", 
                "JobLevel", "PerformanceRating", "DistanceFromHome"]
feature_df = ibm.groupby("JobRole")[feature_cols].mean().reindex(all_roles, fill_value=0).fillna(0)
scaler = StandardScaler()
node_features = torch.tensor(scaler.fit_transform(feature_df.values), dtype=torch.float32).to(device)

# Advanced GNN with node features

In [None]:
class AdvancedCareerGNN(nn.Module):
    def __init__(self, num_nodes, feature_dim, hidden_dim=128, layers=6):
        super().__init__()
        self.node_emb = nn.Embedding(num_nodes, hidden_dim)
        self.feature_proj = nn.Linear(feature_dim, hidden_dim)
        self.layers = layers
        nn.init.xavier_uniform_(self.node_emb.weight)
    
    def forward(self, adj, features):
        h = self.node_emb.weight + self.feature_proj(features)
        for _ in range(self.layers):  # Fixed: was 'layers', now 'self.layers'
            h = torch.matmul(adj, h)
            h = F.normalize(h, p=2, dim=1)
        return h

print("Training GNN with real employee features...")
gnn = AdvancedCareerGNN(n_nodes, feature_dim=7, hidden_dim=128, layers=6).to(device)
with torch.no_grad():
    embeddings = gnn(adj_tensor, node_features).cpu().numpy()
print("GNN embeddings generated successfully!")

# Recommendation

In [None]:
def recommend_next_roles(current_role: str, top_k: int = 5):
    if current_role not in role_to_id:
        return pd.DataFrame({"Error": ["Role not found"]})
    idx = role_to_id[current_role]
    scores = np.dot(embeddings, embeddings[idx])
    scores[idx] = -999  # Remove self
    top_idx = np.argsort(-scores)[:top_k]
    return pd.DataFrame([{
        "Next Role": all_roles[i],
        "Confidence": round(float(scores[i]), 4)
    } for i in top_idx])

# Load Qwen2-7B-Instruct (8-bit) for Persian RAG

In [None]:
print("Loading Qwen2-7B-Instruct (8-bit) for natural Persian explanations...")
quant_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B-Instruct", trust_remote_code=True)
model_rag = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-7B-Instruct",
    device_map="auto",
    quantization_config=quant_config,
    trust_remote_code=True
)

rag = pipeline(
    "text-generation",
    model=model_rag,
    tokenizer=tokenizer,
    max_new_tokens=180,
    temperature=0.75,
    top_p=0.92,
    do_sample=True,
    repetition_penalty=1.25
)

rag_cache = {}

def explain_career_path(current: str, next_role: str) -> str:
    key = f"{current}→{next_role}"
    if key in rag_cache:
        return rag_cache[key]
    
    prompt = f"""به فارسی حرفه‌ای، روان و الهام‌بخش در ۳ جمله کوتاه بنویس:
چرا انتقال از «{current}» به «{next_role}» یک مسیر شغلی موفق، منطقی و رایج در شرکت‌های بزرگ است؟
مستقیم شروع کن. از شماره، لیست و ایموجی استفاده نکن."""

    try:
        result = rag(prompt, return_full_text=False)[0]["generated_text"].strip()
        clean = result.split("\n\n")[0].split("###")[0].split("توضیح:")[0].strip()
        if len(clean) < 50:
            clean = f"انتقال از {current} به {next_role} یک پیشرفت طبیعی و پرارزش در مسیر شغلی است که مسئولیت و تأثیرگذاری شما را افزایش می‌دهد."
    except Exception:
        clean = "این انتقال یکی از موفق‌ترین مسیرهای شغلی در صنعت است."
    
    rag_cache[key] = clean
    return clean

# Dashboard

In [7]:
dropdown = widgets.Dropdown(options=real_roles, value="Software Engineer", description="Current Role:")
button = widgets.Button(description="Get Career Path", button_style="success", icon="rocket")
output = widgets.Output()

def on_click(b):
    with output:
        clear_output()
        role = dropdown.value
        print(f"Current Role: {role}\n")
        recs = recommend_next_roles(role, 5)
        
        display(HTML(f"""
        <div style="background:linear-gradient(135deg,#667eea,#764ba2); padding:30px; border-radius:20px; text-align:center; color:white; font-size:20px;">
            <h2>Career Path Recommendations for <b>{role}</b></h2>
        </div><br>
        """))
        
        for i, r in recs.iterrows():
            nr = r["Next Role"]
            conf = r["Confidence"]
            exp = explain_career_path(role, nr)
            print(f"{i+1}. {nr}")
            print(f"    Confidence: {conf:.4f}")
            print(f"    {exp}\n")
            print("━" * 85)

button.on_click(on_click)

print("HR-Path Pro is ready! Select a role and click the button.")
display(widgets.VBox([dropdown, button]))
display(output)

# Save final artifacts
with open("HR_Path_Pro_v9_Final.pkl", "wb") as f:
    pickle.dump({"embeddings": embeddings, "role_to_id": role_to_id, "all_roles": all_roles, "rag_cache": rag_cache}, f)

Using device: cuda
Loading HR datasets...
Found 41 real job roles
Building career graph...
Training GNN with real employee features...
GNN embeddings generated successfully!
Loading Qwen2-7B-Instruct (8-bit) for natural Persian explanations...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


HR-Path Pro is ready! Select a role and click the button.


VBox(children=(Dropdown(description='Current Role:', index=36, options=('Accountant I', 'Administrative Assist…

Output()


HR-Path Pro v9.0 — Final & Perfect Version!
Download 'HR_Path_Pro_v9_Final.pkl' — Ready for GitHub and Resume!
