# 🔍 SHL Assessment Recommendation System
This notebook builds a smart recommender for SHL assessments based on job descriptions using sentence embeddings.

In [1]:
!pip install -q pandas faiss-cpu sentence-transformers

## 📦 Install & Import Libraries

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd

csv_path = '/content/drive/MyDrive/shl-recommendation-system/data/shl_assessments.csv'

df = pd.read_csv(csv_path)
df.head()

Unnamed: 0,Assessment Name,URL,Remote Testing,Adaptive/IRT,Test Type,Duration
0,Global Skills Development Report,https://www.shl.com/solutions/products/product...,Yes,No,"A, E, B, C, D, P",60 mins
1,.NET Framework 4.5,https://www.shl.com/solutions/products/product...,Yes,Yes,K,45 mins
2,.NET MVC (New),https://www.shl.com/solutions/products/product...,Yes,No,K,40 mins
3,.NET MVVM (New),https://www.shl.com/solutions/products/product...,Yes,No,K,35 mins
4,.NET WCF (New),https://www.shl.com/solutions/products/product...,Yes,No,K,40 mins


In [4]:
df.columns = df.columns.str.strip()

## 📊 Load SHL Data and Prepare Corpus

In [5]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer('all-MiniLM-L6-v2')

def row_to_text(row):
    return (
        f"{row['Assessment Name']}. "
        f"Remote Testing: {row['Remote Testing']}, "
        f"Adaptive/IRT: {row['Adaptive/IRT']}, "
        f"Duration: {row['Duration']}, "
        f"Test Type: {row['Test Type']}."
    )

texts = df.apply(row_to_text, axis=1).tolist()

embeddings = model.encode(texts, show_progress_bar=True)
embeddings = np.array(embeddings)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
!pip install -q faiss-cpu

## 🧠 Encode SHL Assessments using Sentence Transformers

In [7]:
import faiss

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print(f"Index size: {index.ntotal} vectors")

Index size: 20 vectors


In [8]:
def recommend_assessments(user_query, top_k=10):
    query_embedding = model.encode([user_query])

    distances, indices = index.search(query_embedding, top_k)

    results = df.iloc[indices[0]].copy()
    results["Similarity Score"] = distances[0]
    return results.reset_index(drop=True)

In [9]:
user_query = "Looking to hire for Python and JavaScript with a max test time of 60 minutes"
recommend_assessments(user_query)

Unnamed: 0,Assessment Name,URL,Remote Testing,Adaptive/IRT,Test Type,Duration,Similarity Score
0,AI Skills,https://www.shl.com/solutions/products/product...,Yes,No,P,25 mins,1.19476
1,Accounts Payable Simulation (New),https://www.shl.com/solutions/products/product...,Yes,No,S,50 mins,1.196625
2,Accounts Payable (New),https://www.shl.com/solutions/products/product...,Yes,No,K,45 mins,1.214674
3,Global Skills Development Report,https://www.shl.com/solutions/products/product...,Yes,No,"A, E, B, C, D, P",60 mins,1.215753
4,ADO.NET (New),https://www.shl.com/solutions/products/product...,Yes,No,K,45 mins,1.216837
5,Adobe Photoshop CC,https://www.shl.com/solutions/products/product...,Yes,Yes,K,40 mins,1.223442
6,Adobe Experience Manager (New),https://www.shl.com/solutions/products/product...,Yes,No,K,35 mins,1.233955
7,Agile Software Development,https://www.shl.com/solutions/products/product...,Yes,No,K,35 mins,1.235825
8,Aeronautical Engineering (New),https://www.shl.com/solutions/products/product...,Yes,No,K,45 mins,1.249209
9,.NET Framework 4.5,https://www.shl.com/solutions/products/product...,Yes,Yes,K,45 mins,1.253111


## 🎯 Define Semantic Recommendation Function

In [10]:
code = """
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Load the data
df = pd.read_csv('shl_assessments.csv')
df.columns = df.columns.str.strip()

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Add Duration to text for better semantic search
def row_to_text(row):
    return (
        f"{row['Assessment Name']}. Remote Testing: {row['Remote Testing']}, "
        f"Adaptive/IRT: {row['Adaptive/IRT']}, Duration: {row['Duration']}, "
        f"Test Type: {row['Test Type']}."
    )

# Prepare embeddings
texts = df.apply(row_to_text, axis=1).tolist()
embeddings = model.encode(texts, show_progress_bar=True)
embeddings = np.array(embeddings)

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Recommendation function
def recommend_assessments(user_query, top_k=5):
    query_embedding = model.encode([user_query])
    distances, indices = index.search(query_embedding, top_k)
    results = df.iloc[indices[0]].copy()
    results["Similarity Score"] = distances[0]
    return results[["Assessment Name", "URL", "Remote Testing", "Adaptive/IRT", "Duration", "Test Type", "Similarity Score"]]

# ==== Run manually in Colab ====
sample_query = "Hiring a data analyst with Excel and cognitive skills for a 30 minute test"
top_recommendations = recommend_assessments(sample_query, top_k=5)

# Display
print("🔍 Top SHL Assessment Recommendations:")
display(top_recommendations)

"""

# Save to app.py
with open("app.py", "w") as f:
    f.write(code)

In [11]:
!pip install streamlit



In [12]:
!pip install fastapi uvicorn nest_asyncio sentence-transformers pandas
!npm install -g localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K
changed 22 packages in 4s
[1G[0K⠋[1G[0K
[1G[0K⠋[1G[0K3 packages are looking for funding
[1G[0K⠋[1G[0K  run `npm fund` for details
[1G[0K⠋[1G[0K

## 📈 Optional: Evaluate with Recall@3 and MAP@3

In [13]:
import nest_asyncio
import pandas as pd
from fastapi import FastAPI, Query
from pydantic import BaseModel
from typing import List
from sentence_transformers import SentenceTransformer, util
import uvicorn
import threading

nest_asyncio.apply()

# Init FastAPI app
app = FastAPI()
df = pd.read_csv("shl_assessments.csv")
df.fillna("Not Available", inplace=True)
df["Full Description"] = df["Assessment Name"] + " " + df["Test Type"]

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(df["Full Description"].tolist(), convert_to_tensor=True)

class Assessment(BaseModel):
    assessment_name: str
    url: str
    remote_testing: str
    adaptive_irt: str
    test_type: str
    duration: str

@app.get("/recommend", response_model=List[Assessment])
def recommend(query: str = Query(...), top_k: int = 5):
    query_embedding = model.encode(query, convert_to_tensor=True)
    hits = util.semantic_search(query_embedding, embeddings, top_k=top_k)[0]
    results = []
    for hit in hits:
        row = df.iloc[hit["corpus_id"]]
        results.append({
            "assessment_name": row["Assessment Name"],
            "url": row["URL"],
            "remote_testing": row["Remote Testing"],
            "adaptive_irt": row["Adaptive/IRT"],
            "test_type": row["Test Type"],
            "duration": row["Duration"]
        })
    return results

In [14]:
def run():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run)
thread.start()

In [15]:
!pip install fastapi uvicorn nest-asyncio pyngrok
!npm install -g localtunnel

INFO:     Started server process [5332]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K
changed 22 packages in 3s
[1G[0K⠼[1G[0K
[1G[0K⠼[1G[0K3 packages are looking for funding
[1G[0K⠼[1G[0K  run `npm fund` for details
[1G[0K⠼[1G[0K

## 🧩 Launch Gradio App with Download Option

In [16]:
%%writefile api.py
from typing import List
from sentence_transformers import SentenceTransformer, util

app = FastAPI()

df = pd.read_csv("shl_assessments.csv")
df.fillna("Not Available", inplace=True)
df["Full Description"] = df["Assessment Name"] + " " + df["Test Type"]

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(df["Full Description"].tolist(), convert_to_tensor=True)

class Assessment(BaseModel):
    assessment_name: str
    url: str
    remote_testing: str
    adaptive_irt: str
    test_type: str
    duration: str

@app.get("/recommend", response_model=List[Assessment])
def recommend(query: str = Query(...), top_k: int = 5):
    query_embedding = model.encode(query, convert_to_tensor=True)
    hits = util.semantic_search(query_embedding, embeddings, top_k=top_k)[0]
    results = []
    for hit in hits:
        i = hit["corpus_id"]
        row = df.iloc[i]
        results.append({
            "assessment_name": row["Assessment Name"],
            "url": row["URL"],
            "remote_testing": row["Remote Testing"],
            "adaptive_irt": row["Adaptive/IRT"],
            "test_type": row["Test Type"],
            "duration": row["Duration"]
        })
    return results

Overwriting api.py


In [17]:
nest_asyncio.apply()

def run():
    uvicorn.run("api:app", host="0.0.0.0", port=8000)

threading.Thread(target=run).start()

In [18]:
!pip install -q gradio sentence-transformers pandas

INFO:     Started server process [5332]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


## ✅ Example Queries for Demo Testing

In [23]:
import gradio as gr

df = pd.read_csv("shl_assessments.csv")
df.fillna("Not Available", inplace=True)
df["Full Description"] = df["Assessment Name"].astype(str) + " " + df["Test Type"].astype(str)

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(df["Full Description"].tolist(), convert_to_tensor=True)

def recommend(query, top_k):
    try:
        if not query.strip():
            return {"error": "Please enter a valid query."}

        query_embedding = model.encode(query, convert_to_tensor=True)
        hits = util.semantic_search(query_embedding, embeddings, top_k=top_k)[0]

        results = []
        for hit in hits:
            i = hit["corpus_id"]
            row = df.iloc[i]
            result = {
                "Assessment Name": row.get("Assessment Name", "N/A"),
                "URL": row.get("URL", "N/A"),
                "Remote Testing": row.get("Remote Testing", "N/A"),
                "Adaptive/IRT": row.get("Adaptive/IRT", "N/A"),
                "Test Type": row.get("Test Type", "N/A"),
                "Duration": row.get("Duration", "N/A")
            }
            results.append(result)
        return results

    except Exception as e:
        return {"error": str(e)}

eval_queries = [
    {
        "query": "Looking to hire AWS developers",
        "ground_truth": ["Amazon Web Services (AWS) Development (New)"]
    },
    {
        "query": "Need designers with Adobe tools expertise",
        "ground_truth": ["Adobe Photoshop CC", "Adobe Experience Manager (New)"]
    },
    {
        "query": "Require candidates with .NET knowledge",
        "ground_truth": [".NET Framework 4.5", ".NET MVC (New)", ".NET WPF (New)"]
    }
]

def evaluate_model():
    rows = []
    for example in eval_queries:
        query_embedding = model.encode(example["query"], convert_to_tensor=True)
        hits = util.semantic_search(query_embedding, embeddings, top_k=3)[0]
        predictions = [df.iloc[hit["corpus_id"]]["Assessment Name"] for hit in hits]
        gt = example["ground_truth"]

        recall = len(set(predictions) & set(gt)) / len(gt)

        map_score = 0.0
        hits_found = 0
        for i, pred in enumerate(predictions):
            if pred in gt:
                hits_found += 1
                map_score += hits_found / (i + 1)
        map_score /= min(len(gt), 3)

        rows.append({
            "Query": example["query"],
            "Recall@3": round(recall, 3),
            "MAP@3": round(map_score, 3),
            "Predictions": ", ".join(predictions),
            "Ground Truth": ", ".join(gt)
        })

    df_results = pd.DataFrame(rows)
    df_results.loc["Mean"] = {
        "Query": "Average",
        "Recall@3": round(df_results["Recall@3"].mean(), 3),
        "MAP@3": round(df_results["MAP@3"].mean(), 3),
        "Predictions": "",
        "Ground Truth": ""
    }

    df_results.to_csv("eval_results.csv", index=False)
    return df_results, "eval_results.csv"

def create_app():
    with gr.Blocks() as app:
        gr.Markdown("# 🔍 SHL Assessment Recommendation Engine")
        gr.Markdown("Get relevant SHL tests based on your job descriptions or skill queries.")

        with gr.Tab("🔮 Recommender"):
            query_input = gr.Textbox(label="Paste a Job Description or Query", placeholder="e.g. Looking for Python and SQL developer under 60 mins")
            topk_slider = gr.Slider(1, 10, value=5, step=1, label="Top K Recommendations")
            output_json = gr.JSON(label="Recommended SHL Assessments")
            gr.Button("Recommend").click(fn=recommend, inputs=[query_input, topk_slider], outputs=output_json)

        with gr.Tab("📊 Evaluate Accuracy (MAP@3 / Recall@3)"):
            eval_btn = gr.Button("Run Evaluation")
            eval_table = gr.Dataframe()
            eval_file = gr.File(label="Download CSV")
            eval_btn.click(fn=evaluate_model, outputs=[eval_table, eval_file])

    return app

create_app().launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://db50c1b176a62b3464.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


