# SHL Assessment Recommendation System

In [7]:
import pandas as pd
from sentence_transformers import SentenceTransformer,util
import torch
import matplotlib.pyplot as plt
import numpy as np

In [None]:
catalog_df = pd.read_csv("SHL_sample_data.csv") # sample dataset
catalog_df

Unnamed: 0,Assessment Name,URL,Duration,Remote Testing Support,Adaptive/IRT,Test Type,Skills,Description
0,Java Developer Assessment #1,https://shl.com/assessment/1,40,Yes,No,Coding,Java,The 'Java Developer Assessment #1' is a Coding...
1,Full Stack Developer Test #2,https://shl.com/assessment/2,50,Yes,Yes,Coding,JavaScript,Python
2,Data Analyst Screening #3,https://shl.com/assessment/3,45,No,No,Cognitive,SQL,Python
3,Python & SQL Challenge #4,https://shl.com/assessment/4,60,Yes,Yes,Coding,Python,SQL
4,Cognitive Aptitude Test #5,https://shl.com/assessment/5,30,Yes,No,Cognitive,Problem Solving,The 'Cognitive Aptitude Test #5' is a Cognitiv...
5,Personality Insights Assessment #6,https://shl.com/assessment/6,35,Yes,No,Personality,Teamwork,Communication
6,Frontend Developer Skills Test #7,https://shl.com/assessment/7,45,No,Yes,Coding,JavaScript,Communication
7,Business Communication Evaluation #8,https://shl.com/assessment/8,25,Yes,Yes,Communication,Communication,Teamwork
8,Logical Reasoning Assessment #9,https://shl.com/assessment/9,20,Yes,No,Cognitive,Problem Solving,The 'Logical Reasoning Assessment #9' is a Cog...
9,Coding Challenge for Developers #10,https://shl.com/assessment/10,60,No,Yes,Coding,Python,Java


In [9]:
catalog_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Assessment Name         50 non-null     object
 1   URL                     50 non-null     object
 2   Duration                50 non-null     int64 
 3   Remote Testing Support  50 non-null     object
 4   Adaptive/IRT            50 non-null     object
 5   Test Type               50 non-null     object
 6   Skills                  50 non-null     object
 7   Description             50 non-null     object
dtypes: int64(1), object(7)
memory usage: 3.3+ KB


In [9]:
def combine_row(row):
    parts = [
        str(row["Assessment Name"]),
        str(row["Duration"]),
        str(row["Remote Testing Support"]),
        str(row["Adaptive/IRT"]),
        str(row["Test Type"]),
        str(row["Skills"]),
        str(row["Description"]),
    ]
    return ' '.join(parts)

In [10]:
catalog_df['combined'] = catalog_df.apply(combine_row,axis=1)

In [11]:
catalog_df

Unnamed: 0,Assessment Name,URL,Duration,Remote Testing Support,Adaptive/IRT,Test Type,Skills,Description,combined
0,Java Developer Assessment #1,https://shl.com/assessment/1,40,Yes,No,Coding,Java,The 'Java Developer Assessment #1' is a Coding...,Java Developer Assessment #1 40 Yes No Coding ...
1,Full Stack Developer Test #2,https://shl.com/assessment/2,50,Yes,Yes,Coding,JavaScript,Python,Full Stack Developer Test #2 50 Yes Yes Coding...
2,Data Analyst Screening #3,https://shl.com/assessment/3,45,No,No,Cognitive,SQL,Python,Data Analyst Screening #3 45 No No Cognitive S...
3,Python & SQL Challenge #4,https://shl.com/assessment/4,60,Yes,Yes,Coding,Python,SQL,Python & SQL Challenge #4 60 Yes Yes Coding Py...
4,Cognitive Aptitude Test #5,https://shl.com/assessment/5,30,Yes,No,Cognitive,Problem Solving,The 'Cognitive Aptitude Test #5' is a Cognitiv...,Cognitive Aptitude Test #5 30 Yes No Cognitive...
5,Personality Insights Assessment #6,https://shl.com/assessment/6,35,Yes,No,Personality,Teamwork,Communication,Personality Insights Assessment #6 35 Yes No P...
6,Frontend Developer Skills Test #7,https://shl.com/assessment/7,45,No,Yes,Coding,JavaScript,Communication,Frontend Developer Skills Test #7 45 No Yes Co...
7,Business Communication Evaluation #8,https://shl.com/assessment/8,25,Yes,Yes,Communication,Communication,Teamwork,Business Communication Evaluation #8 25 Yes Ye...
8,Logical Reasoning Assessment #9,https://shl.com/assessment/9,20,Yes,No,Cognitive,Problem Solving,The 'Logical Reasoning Assessment #9' is a Cog...,Logical Reasoning Assessment #9 20 Yes No Cogn...
9,Coding Challenge for Developers #10,https://shl.com/assessment/10,60,No,Yes,Coding,Python,Java,Coding Challenge for Developers #10 60 No Yes ...


In [11]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [12]:
#converting each row into a vector/word embedding
corpus = catalog_df['combined'].tolist()
corpus_embeddings = model.encode(corpus,convert_to_tensor=True)

In [15]:
corpus_embeddings

tensor([[-4.1132e-02,  3.3712e-02, -1.5192e-02,  ...,  6.8478e-02,
          1.1157e-02, -3.8792e-02],
        [-8.3015e-02, -5.8202e-02,  3.8498e-02,  ...,  1.4818e-01,
          8.3665e-02, -4.9551e-02],
        [ 2.8609e-02,  1.4035e-02, -4.8932e-02,  ...,  6.8456e-03,
          6.1058e-02, -2.2603e-05],
        ...,
        [ 5.7849e-02,  1.8829e-02, -1.3150e-02,  ...,  4.9492e-02,
         -7.7539e-03, -8.2473e-02],
        [ 3.9873e-02,  2.4800e-03, -2.5676e-02,  ...,  3.8942e-02,
          8.5604e-03, -4.1729e-02],
        [-1.0276e-01, -2.3152e-02,  1.3055e-02,  ...,  1.6705e-01,
          8.0500e-02, -2.7817e-02]], device='mps:0')

In [16]:
def print_assessments(user_query):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = model.encode(user_query, convert_to_tensor = True)
    cosine_scores = util.cos_sim(query_embedding,corpus_embeddings)[0]
    top_k = min(5,len(corpus))
    top_results = torch.topk(cosine_scores,k=top_k)
    print('Top 5 Matching Assessments:\n')
    results = []
    for score, idx in zip(top_results[0], top_results[1]):
        idx = idx.item()
        result = {
            "Assessment Name": catalog_df.iloc[idx]['Assessment Name'],
            "Skills": catalog_df.iloc[idx]['Skills'],
            "Test Type": catalog_df.iloc[idx]['Test Type'],
            "Description": catalog_df.iloc[idx]['Description'],
            "Remote Testing Support": catalog_df.iloc[idx]['Remote Testing Support'],
            "Adaptive/IRT": catalog_df.iloc[idx]['Adaptive/IRT'],
            "Duration": catalog_df.iloc[idx]['Duration'],
            "URL": catalog_df.iloc[idx]['URL'],
            "Score": round(score.item(), 4)
        }

        print(f"Assessment: {result['Assessment Name']}")
        print(f"Skills: {result['Skills']}")
        print(f"Test Type: {result['Test Type']}")
        print(f"Description: {result['Description']}")
        print(f"Remote Testing Support: {result['Remote Testing Support']}")
        print(f"Adaptive/IRT: {result['Adaptive/IRT']}")
        print(f"Duration: {result['Duration']} mins")
        print(f"URL: {result['URL']}")
        print(f"Score: {result['Score']}")
        print("-" * 50)
        
        results.append(result)
    return results

In [17]:
user_query = input("Enter your query:")
print("\n")
results = print_assessments(user_query)

Enter your query: python sql coding test




Top 5 Matching Assessments:

Assessment: Python & SQL Challenge #4
Skills: Python
Test Type: Coding
Description: SQL
Remote Testing Support: Yes
Adaptive/IRT: Yes
Duration: 60 mins
URL: https://shl.com/assessment/4
Score: 0.7926
--------------------------------------------------
Assessment: Backend Developer Challenge #29
Skills: Python
Test Type: Coding
Description: SQL
Remote Testing Support: Yes
Adaptive/IRT: Yes
Duration: 60 mins
URL: https://shl.com/assessment/29
Score: 0.7855
--------------------------------------------------
Assessment: SQL & Reporting Challenge #18
Skills: SQL
Test Type: Coding
Description: Python
Remote Testing Support: Yes
Adaptive/IRT: No
Duration: 40 mins
URL: https://shl.com/assessment/18
Score: 0.7318
--------------------------------------------------
Assessment: Data Analyst Screening #3
Skills: SQL
Test Type: Cognitive
Description: Python
Remote Testing Support: No
Adaptive/IRT: No
Duration: 45 mins
URL: https://shl.com/assessment/3
Score: 0.6229
----

In [18]:
user_query2 = input("Enter your query:")
print("\n")
result2 = print_assessments(user_query2)

Enter your query: I am hiring for an analyst and wants applications to screen using Cognitive and personality tests, what options are available within 45 mins.




Top 5 Matching Assessments:

Assessment: Cognitive Aptitude Test #5
Skills: Problem Solving
Test Type: Cognitive
Description: The 'Cognitive Aptitude Test #5' is a Cognitive test focused on Problem Solving. Duration is 30 mins. It supports remote testing: Yes and adaptive format: No.
Remote Testing Support: Yes
Adaptive/IRT: No
Duration: 30 mins
URL: https://shl.com/assessment/5
Score: 0.5845
--------------------------------------------------
Assessment: Critical Thinking & Aptitude Test #30
Skills: Problem Solving
Test Type: Cognitive
Description: The 'Critical Thinking & Aptitude Test #30' is a Cognitive test focused on Problem Solving. Duration is 40 mins. It supports remote testing: Yes and adaptive format: No.
Remote Testing Support: Yes
Adaptive/IRT: No
Duration: 40 mins
URL: https://shl.com/assessment/30
Score: 0.5815
--------------------------------------------------
Assessment: Developer Personality Assessment #25
Skills: Personality
Test Type: Personality
Description: The '

In [40]:
def compute_metrics(benchmark_queries,k=5):
    recall_scores = []
    average_precisions = []

    for entry in benchmark_queries:
        query = entry["query"]
        relevant_items = entry["relevant"]

        results = find_assessments(query)
        topk = [res["Assessment Name"] for res in results[:k]]

        #recall@k
        count = 0
        for item in topk:
            if item in relevant_items:
                count+=1
        recall_score = count/len(relevant_items)
        recall_scores.append(recall_score)

        #map@k
        ap = 0.0
        relevant_count = 0
        for i,res in enumerate(topk):
            if res in relevant_items:
                relevant_count+=1
                precision_at_k = relevant_count/(i+1)
                ap += precision_at_k
        ap = ap/min(k,len(relevant_items))
        average_precisions.append(ap)
        
    recall = sum(recall_scores)/len(recall_scores)
    map_ = sum(average_precisions)/len(average_precisions)
    
    print(f"Recall@{k}: {recall:.4f}")
    print(f"MAP@{k}: {map_:.4f}")

In [13]:
def find_assessments(user_query,k=5):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = model.encode(user_query, convert_to_tensor = True)
    cosine_scores = util.cos_sim(query_embedding,corpus_embeddings)[0]
    top_k = min(k,len(corpus))
    top_results = torch.topk(cosine_scores,k=top_k)
    results = []
    for score, idx in zip(top_results[0], top_results[1]):
        idx = idx.item()
        result = {
            "Assessment Name": catalog_df.iloc[idx]['Assessment Name'],
            "Skills": catalog_df.iloc[idx]['Skills'],
            "Test Type": catalog_df.iloc[idx]['Test Type'],
            "Description": catalog_df.iloc[idx]['Description'],
            "Remote Testing Support": catalog_df.iloc[idx]['Remote Testing Support'],
            "Adaptive/IRT": catalog_df.iloc[idx]['Adaptive/IRT'],
            "Duration": catalog_df.iloc[idx]['Duration'],
            "URL": catalog_df.iloc[idx]['URL'],
            "Score": round(score.item(), 4)
        }
        results.append(result)
    return results

In [39]:
benchmark_queries = [
    {
        "query": "I am hiring for Java developers who can also collaborate effectively with my business teams. Looking for an assessment that can be completed in 40 minutes.",
        "relevant": ["Java Developer Assessment #1","Quick Java Screening #27","Java Coding Drill #47"]
    },
    {
        "query": "Suggest an assessment for a fresher data analyst that includes Python and SQL skills in under 50 minutes.",
        "relevant": ["Data Analyst Screening #3","SQL & Reporting Challenge #18","Cross-functional Assessment #41"]
    },
    {
        "query": "Looking for remote-enabled JavaScript technical assessment. Needs to be adaptive.",
        "relevant": ["JavaScript Screening Test #11"]
    },
    {
        "query": "Want to assess communication and teamwork skills in under 30 minutes.",
        "relevant": ["Communication Skills Test #19","Business Communication Evaluation #8","Communication & Team Fit #24","Interpersonal Skills Assessment #43"]
    },
]

In [42]:
compute_metrics(benchmark_queries,k=5)

Recall@5: 0.8542
MAP@5: 0.7181
