In [1]:
# Create project folders
import os

folders = [
    "data",
    "catalog",
    "embeddings",
    "api",
    "frontend",
    "evaluation",
    "outputs"
]

for f in folders:
    os.makedirs(f, exist_ok=True)

os.listdir()


['.config',
 'data',
 'api',
 'outputs',
 'frontend',
 'evaluation',
 'catalog',
 'embeddings',
 'sample_data']

In [2]:
!pip install sentence-transformers scikit-learn pandas numpy fastapi uvicorn streamlit


Collecting streamlit
  Downloading streamlit-1.52.2-py3-none-any.whl.metadata (9.8 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.2-py3-none-any.whl (9.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m67.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m85.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.52.2


In [3]:
import pandas as pd
import numpy as np

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity




In [4]:
# Step 5: Create a clean SHL catalog snapshot (sample but scalable)

catalog = [
    {
        "name": "Java Programming Test",
        "url": "https://www.shl.com/solutions/products/java-programming/",
        "description": "Evaluates Java programming skills, object-oriented concepts, and coding ability.",
        "test_type": "K"
    },
    {
        "name": "Python Programming Test",
        "url": "https://www.shl.com/solutions/products/python-programming/",
        "description": "Assesses Python coding, scripting, and data handling skills.",
        "test_type": "K"
    },
    {
        "name": "Collaboration & Teamwork",
        "url": "https://www.shl.com/solutions/products/teamwork/",
        "description": "Measures collaboration, communication, and teamwork behavior.",
        "test_type": "P"
    },
    {
        "name": "Problem Solving Ability",
        "url": "https://www.shl.com/solutions/products/problem-solving/",
        "description": "Tests logical reasoning, analytical thinking, and problem-solving skills.",
        "test_type": "K"
    },
    {
        "name": "Personality Questionnaire",
        "url": "https://www.shl.com/solutions/products/personality/",
        "description": "Evaluates workplace personality traits and behavioral tendencies.",
        "test_type": "P"
    }
]

catalog_df = pd.DataFrame(catalog)
catalog_df


Unnamed: 0,name,url,description,test_type
0,Java Programming Test,https://www.shl.com/solutions/products/java-pr...,"Evaluates Java programming skills, object-orie...",K
1,Python Programming Test,https://www.shl.com/solutions/products/python-...,"Assesses Python coding, scripting, and data ha...",K
2,Collaboration & Teamwork,https://www.shl.com/solutions/products/teamwork/,"Measures collaboration, communication, and tea...",P
3,Problem Solving Ability,https://www.shl.com/solutions/products/problem...,"Tests logical reasoning, analytical thinking, ...",K
4,Personality Questionnaire,https://www.shl.com/solutions/products/persona...,Evaluates workplace personality traits and beh...,P


In [5]:
# Step 6: Generate embeddings for catalog

model = SentenceTransformer("all-MiniLM-L6-v2")

texts = (catalog_df["name"] + " " + catalog_df["description"]).tolist()
catalog_embeddings = model.encode(texts)

catalog_embeddings.shape


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

(5, 384)

In [6]:
# Step 7: Recommendation logic

def recommend_assessments(query, top_k=5):
    query_embedding = model.encode([query])
    similarities = cosine_similarity(query_embedding, catalog_embeddings)[0]

    catalog_df["score"] = similarities
    ranked = catalog_df.sort_values("score", ascending=False)

    return ranked.head(top_k)[["name", "url", "test_type", "score"]]


In [7]:
query = "Need a Java developer who can collaborate with external teams and stakeholders"

recommend_assessments(query)


Unnamed: 0,name,url,test_type,score
0,Java Programming Test,https://www.shl.com/solutions/products/java-pr...,K,0.398179
2,Collaboration & Teamwork,https://www.shl.com/solutions/products/teamwork/,P,0.303246
3,Problem Solving Ability,https://www.shl.com/solutions/products/problem...,K,0.145169
1,Python Programming Test,https://www.shl.com/solutions/products/python-...,K,0.111269
4,Personality Questionnaire,https://www.shl.com/solutions/products/persona...,P,0.103325


In [8]:
# Step 10: Load SHL dataset

dataset_path = "Gen_AI Dataset.xlsx"

xls = pd.ExcelFile(dataset_path)
xls.sheet_names


['Train-Set', 'Test-Set']

In [9]:
train_df = pd.read_excel(xls, sheet_name=0)
test_df = pd.read_excel(xls, sheet_name=1)

train_df.head(), test_df.head()


(                                               Query  \
 0  I am hiring for Java developers who can also c...   
 1  I am hiring for Java developers who can also c...   
 2  I am hiring for Java developers who can also c...   
 3  I am hiring for Java developers who can also c...   
 4  I am hiring for Java developers who can also c...   
 
                                       Assessment_url  
 0  https://www.shl.com/solutions/products/product...  
 1  https://www.shl.com/solutions/products/product...  
 2  https://www.shl.com/solutions/products/product...  
 3  https://www.shl.com/solutions/products/product...  
 4  https://www.shl.com/products/product-catalog/v...  ,
                                                Query
 0  Looking to hire mid-level professionals who ar...
 1  Job Description\n\n Join a community that is s...
 2  I am hiring for an analyst and wants applicati...
 3  I have a JD Job Description\n\n People Science...
 4  I am new looking for new graduates in my sale

In [11]:
test_df.columns


Index(['Query'], dtype='object')

In [12]:
# Step 11: Generate submission rows

rows = []

for query in test_df["Query"]:
    results = recommend_assessments(query, top_k=5)
    for url in results["url"]:
        rows.append({
            "Query": query,
            "Assessment_url": url
        })

submission_df = pd.DataFrame(rows)
submission_df.head()


Unnamed: 0,Query,Assessment_url
0,Looking to hire mid-level professionals who ar...,https://www.shl.com/solutions/products/python-...
1,Looking to hire mid-level professionals who ar...,https://www.shl.com/solutions/products/problem...
2,Looking to hire mid-level professionals who ar...,https://www.shl.com/solutions/products/java-pr...
3,Looking to hire mid-level professionals who ar...,https://www.shl.com/solutions/products/persona...
4,Looking to hire mid-level professionals who ar...,https://www.shl.com/solutions/products/teamwork/


In [13]:
submission_df.to_csv("outputs/submission.csv", index=False)


In [14]:
submission_df.head(), submission_df.shape


(                                               Query  \
 0  Looking to hire mid-level professionals who ar...   
 1  Looking to hire mid-level professionals who ar...   
 2  Looking to hire mid-level professionals who ar...   
 3  Looking to hire mid-level professionals who ar...   
 4  Looking to hire mid-level professionals who ar...   
 
                                       Assessment_url  
 0  https://www.shl.com/solutions/products/python-...  
 1  https://www.shl.com/solutions/products/problem...  
 2  https://www.shl.com/solutions/products/java-pr...  
 3  https://www.shl.com/solutions/products/persona...  
 4   https://www.shl.com/solutions/products/teamwork/  ,
 (45, 2))