In [None]:
# pip install transformers datasets
# pip install torch
# pip install streamlit
# pip install scikit-learn
# pip install openai

In [1]:
from transformers import AutoModel, AutoTokenizer
import torch

# Load tokenizer and model from Hugging Face
model_name = "ElenaSenger/career-path-representation-mpnet-karrierewege"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

print("Model and tokenizer loaded successfully!")


Model and tokenizer loaded successfully!


In [4]:
# Example input: a career path (text format)
career_path = "Sales Assistant -> Marketing Manager -> Product Manager"

# Tokenize input
inputs = tokenizer(career_path, return_tensors="pt")

# Pass through the model (no gradient calculation for inference)
with torch.no_grad():
    outputs = model(**inputs)

# Let's look at the raw output
print(outputs)

BaseModelOutputWithPooling(last_hidden_state=tensor([[[ 0.3549, -0.2470, -0.0974,  ..., -0.0625, -0.1514, -0.0113],
         [ 0.2996, -0.2911, -0.0321,  ..., -0.0885, -0.1321, -0.0254],
         [ 0.2562, -0.2991, -0.0554,  ...,  0.0378, -0.0767, -0.0447],
         ...,
         [ 0.3100, -0.0965, -0.0552,  ...,  0.0008, -0.1039, -0.0540],
         [ 0.2900, -0.1042, -0.0439,  ..., -0.0142, -0.1139, -0.0601],
         [ 0.2829, -0.1751, -0.0781,  ...,  0.0088, -0.1780,  0.0057]]]), pooler_output=tensor([[ 1.4961e-02, -1.0342e-02, -4.6827e-02, -2.4335e-02, -6.1696e-02,
         -1.8173e-01, -3.5382e-02,  7.8608e-03, -7.4538e-02,  1.1217e-01,
         -4.9914e-02,  3.1809e-02,  1.0934e-02, -7.5257e-02,  4.7661e-02,
         -8.9732e-03, -1.4492e-01, -7.8253e-02,  3.5503e-02, -7.3771e-02,
          6.7314e-02,  5.3020e-02,  1.5734e-02,  4.0780e-02, -4.1518e-02,
         -3.2935e-02, -3.6083e-02, -1.2987e-01,  5.4537e-02,  3.3685e-02,
          1.6684e-04,  1.3270e-02,  1.2826e-01, -5.962

In [5]:
# Extract the pooled output (career path embedding)
career_embedding = outputs.pooler_output

# Print the embedding vector (it's a tensor)
print("Career Path Embedding Shape:", career_embedding.shape)
print("Career Path Embedding Vector:", career_embedding)


Career Path Embedding Shape: torch.Size([1, 768])
Career Path Embedding Vector: tensor([[ 1.4961e-02, -1.0342e-02, -4.6827e-02, -2.4335e-02, -6.1696e-02,
         -1.8173e-01, -3.5382e-02,  7.8608e-03, -7.4538e-02,  1.1217e-01,
         -4.9914e-02,  3.1809e-02,  1.0934e-02, -7.5257e-02,  4.7661e-02,
         -8.9732e-03, -1.4492e-01, -7.8253e-02,  3.5503e-02, -7.3771e-02,
          6.7314e-02,  5.3020e-02,  1.5734e-02,  4.0780e-02, -4.1518e-02,
         -3.2935e-02, -3.6083e-02, -1.2987e-01,  5.4537e-02,  3.3685e-02,
          1.6684e-04,  1.3270e-02,  1.2826e-01, -5.9628e-02, -1.1799e-01,
          2.8661e-02,  8.1314e-02, -6.0863e-02, -9.8545e-02, -1.9114e-02,
          1.9177e-02, -5.6334e-02,  8.8772e-02, -3.7494e-02,  1.2563e-01,
          8.3860e-02,  1.3135e-01, -1.2900e-01, -3.5906e-02,  3.2963e-02,
         -1.3794e-02,  7.4881e-03,  7.5082e-02,  3.4697e-02,  5.7450e-02,
          6.3818e-02, -7.7011e-03,  8.5893e-02, -8.1865e-02, -5.3703e-02,
         -2.0542e-03,  9.7507e-0

In [6]:
candidate_jobs = [
    "Product Manager",
    "Sales Manager",
    "Project Manager",
    "Business Analyst",
    "Marketing Specialist"
]


career_history = "Sales Assistant -> Marketing Manager"
inputs_history = tokenizer(career_history, return_tensors="pt")

with torch.no_grad():
    outputs_history = model(**inputs_history)
    
history_embedding = outputs_history.pooler_output


In [7]:
candidate_inputs = tokenizer(candidate_jobs, padding=True, truncation=True, return_tensors="pt")

with torch.no_grad():
    candidate_outputs = model(**candidate_inputs)

candidate_embeddings = candidate_outputs.pooler_output


In [8]:
import torch.nn.functional as F

# Compute cosine similarity between the history embedding and each candidate job embedding
similarity_scores = F.cosine_similarity(history_embedding, candidate_embeddings)

# Sort candidates by similarity
top_scores, top_indices = similarity_scores.topk(3)

print("Top Predicted Next Jobs:")
for idx in top_indices:
    print(f"- {candidate_jobs[idx]} (Score: {similarity_scores[idx]:.4f})")


Top Predicted Next Jobs:
- Sales Manager (Score: 0.8279)
- Marketing Specialist (Score: 0.6642)
- Product Manager (Score: 0.5542)


In [2]:
import streamlit as st

st.set_page_config(page_title="Career Path Recommender", layout="centered")

st.title("🚀 Career Path Recommender")
st.write("Tell us about yourself to discover your future career path!")

# === Student Inputs ===
interests = st.multiselect(
    "What are you interested in?",
    ["Technology", "Business", "Healthcare", "Engineering", "Media", "Agriculture", "Education", "Law", "Design", "Finance", "AI", "Environment"]
)

skills = st.multiselect(
    "What skills do you have?",
    ["Problem-solving", "Communication", "Creativity", "Teamwork", "Coding", "Math", "Writing", "Critical thinking", "Public speaking"]
)

education_level = st.selectbox(
    "What is your current education level?",
    ["Grade 10", "Grade 11", "Grade 12", "Post-matric / University"]
)

submit = st.button("Find My Career Path")


2025-03-30 16:23:24.287 
  command:

    streamlit run c:\Users\Dolly\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-03-30 16:23:24.319 Session state does not function when running a script without `streamlit run`
