In [None]:
import pandas as pd

# Load your CSV
df = pd.read_csv("/content/shl_assessments_data.csv")

# View columns and shape
df.shape, df.columns

((542, 9),
 Index(['Assessment Name', 'URL', 'Remote Testing', 'Adaptive/IRT', 'Test Type',
        'Duration', 'Job Description', 'Job Levels', 'Languages'],
       dtype='object'))

In [None]:
!pip install -U sentence-transformers beautifulsoup4 requests

Collecting sentence-transformers
  Downloading sentence_transformers-4.0.2-py3-none-any.whl.metadata (13 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_

In [None]:
# Drop duplicates and fill missing values
df.drop_duplicates(inplace=True)

text_cols = ['Assessment Name', 'Job Description', 'Test Type', 'Job Levels', 'Languages']
df[text_cols] = df[text_cols].fillna('')
df = df.reset_index(drop=True)

# Create enriched search corpus including job levels, language, duration
def create_search_corpus(row):
    return (
        f"{row['Assessment Name']} {row['Job Description']} {row['Test Type']} "
        f"Job level: {row['Job Levels']} Language: {row['Languages']} Duration: {row['Duration']} mins"
    )

df['search_corpus'] = df.apply(create_search_corpus, axis=1)
df.shape

(506, 10)

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings
corpus_embeddings = model.encode(df['search_corpus'].tolist(), show_progress_bar=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

In [None]:
import requests
from bs4 import BeautifulSoup

def extract_text_from_url(url):
    try:
        headers = {
            "User-Agent": "Mozilla/5.0"
        }
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract meaningful text from job page
        description = soup.get_text(separator=' ', strip=True)
        return description[:1500]  # Keep it concise
    except Exception as e:
        return f"Unable to fetch job description. Error: {str(e)}"

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def recommend_assessments(query_or_url, top_n=10):
    # If it's a URL, extract text
    if query_or_url.startswith('http'):
        user_input = extract_text_from_url(query_or_url)
    else:
        user_input = query_or_url

    # Embed and calculate cosine similarity
    query_embedding = model.encode([user_input])
    similarity_scores = cosine_similarity(query_embedding, corpus_embeddings)[0]
    top_indices = similarity_scores.argsort()[::-1][:top_n]

    # Prepare results
    results = df.loc[top_indices, [
        'Assessment Name', 'URL', 'Remote Testing', 'Adaptive/IRT', 'Duration', 'Test Type'
    ]].copy()

    # Ensure URL is clickable (especially in Colab)
    results['Assessment Name'] = results.apply(
        lambda row: f"[{row['Assessment Name']}]({row['URL']})", axis=1
    )

    return results.drop(columns='URL').reset_index(drop=True)

In [None]:
recommend_assessments("I am hiring for an analyst and want applications to screen using Cognitive and personality tests, what options are available within 45 mins", top_n=10)


Unnamed: 0,Assessment Name,Remote Testing,Adaptive/IRT,Duration,Test Type
0,[Verify - General Ability Screen](https://www....,Yes,Yes,10,A
1,[Verify - Technical Checking - Next Generation...,Yes,No,5,A
2,[Verify Interactive Process Monitoring](https:...,Yes,No,18,A
3,[Graduate + 8.0 Job Focused Assessment](https:...,Yes,No,30 minutes,A B P
4,[Verify - G+](https://www.shl.com/solutions/pr...,Yes,Yes,36,A
5,[Verify - Verbal Ability - Next Generation](ht...,Yes,Yes,15 minutes,A
6,[Verify - Numerical Ability](https://www.shl.c...,Yes,Yes,20,A
7,[Multitasking Ability](https://www.shl.com/sol...,Yes,No,20,A K S
8,[Verify - Deductive Reasoning](https://www.shl...,Yes,Yes,20,A
9,[SHL Verify Interactive G+](https://www.shl.co...,Yes,Yes,36,A


In [None]:
recommend_assessments("I am hiring for an analyst and want applications to screen using Cognitive and personality tests, what options are available within 10 mins", top_n=10)


Unnamed: 0,Assessment Name,Remote Testing,Adaptive/IRT,Duration,Test Type
0,[Verify - General Ability Screen](https://www....,Yes,Yes,10,A
1,[Verify Interactive Process Monitoring](https:...,Yes,No,18,A
2,[Verify - Technical Checking - Next Generation...,Yes,No,5,A
3,[Graduate + 8.0 Job Focused Assessment](https:...,Yes,No,30 minutes,A B P
4,[Verify - G+](https://www.shl.com/solutions/pr...,Yes,Yes,36,A
5,[Verify - Verbal Ability - Next Generation](ht...,Yes,Yes,15 minutes,A
6,[Multitasking Ability](https://www.shl.com/sol...,Yes,No,20,A K S
7,[Verify - Numerical Ability](https://www.shl.c...,Yes,Yes,20,A
8,[Verify - Deductive Reasoning](https://www.shl...,Yes,Yes,20,A
9,[Graduate 7.1 Job Focused Assessment](https://...,Yes,No,19,B C


In [None]:
recommend_assessments("We are looking for a Data Analyst with strong analytical skills, experience in SQL and Python, and familiarity with Excel dashboards. The candidate should be able to interpret data, generate insights, and communicate results clearly. Preferred test duration: under 10 minutes.")

Unnamed: 0,Assessment Name,Remote Testing,Adaptive/IRT,Duration,Test Type
0,[MS Excel (New)](https://www.shl.com/solutions...,Yes,No,6,K
1,[Microsoft Excel 365 - Essentials (New)](https...,Yes,No,30,K S
2,[Data Warehousing Concepts](https://www.shl.co...,Yes,Yes,25,K
3,[SQL Server Analysis Services (SSAS) (New)](ht...,Yes,No,15,K
4,[IBM DataStage (New)](https://www.shl.com/solu...,Yes,No,15,K
5,[Statistical Analysis System (New)](https://ww...,Yes,No,11,K
6,[Software Business Analysis](https://www.shl.c...,Yes,Yes,30,K
7,[Microsoft Excel 365 (New)](https://www.shl.co...,Yes,No,35,K S
8,[Tableau (New)](https://www.shl.com/solutions/...,Yes,No,8,K
9,[Python (New)](https://www.shl.com/solutions/p...,Yes,No,11,K


In [None]:
recommend_assessments("Hiring a sales executive with excellent communication, negotiation, and persuasion skills. The role involves handling customer queries, closing deals, and maintaining records. Assessment should be quick—preferably within 30 minutes.")

Unnamed: 0,Assessment Name,Remote Testing,Adaptive/IRT,Duration,Test Type
0,[Sales Professional Solution](https://www.shl....,Yes,Yes,47,A B P
1,[Senior Sales Professional Solution](https://w...,Yes,Yes,42,A B P
2,[Sales Representative Solution](https://www.sh...,Yes,Yes,29,A B P
3,[Sales Professional 7.1 (Americas)](https://ww...,Yes,No,21,B C
4,[Technical Sales Associate Solution](https://w...,Yes,No,41,A B P
5,[Sales Professional 7.0 Solution](https://www....,Yes,No,21,C B
6,[Retail Sales Associate Solution](https://www....,Yes,Yes,29,A B P
7,[Entry Level Sales Solution](https://www.shl.c...,Yes,No,20,C P
8,[Retail Manager w/ Sales Solution](https://www...,Yes,Yes,43,A B P
9,[Sales Supervisor Solution](https://www.shl.co...,Yes,Yes,38,A B P


In [None]:
recommend_assessments("Hiring a sales executive with excellent communication, negotiation, and persuasion skills. The role involves handling customer queries, closing deals, and maintaining records. Assessment should be quick—preferably within 50 minutes.")

Unnamed: 0,Assessment Name,Remote Testing,Adaptive/IRT,Duration,Test Type
0,[Sales Professional Solution](https://www.shl....,Yes,Yes,47,A B P
1,[Senior Sales Professional Solution](https://w...,Yes,Yes,42,A B P
2,[Sales Representative Solution](https://www.sh...,Yes,Yes,29,A B P
3,[Sales Professional 7.1 (Americas)](https://ww...,Yes,No,21,B C
4,[Technical Sales Associate Solution](https://w...,Yes,No,41,A B P
5,[Sales Professional 7.0 Solution](https://www....,Yes,No,21,C B
6,[Entry Level Sales Solution](https://www.shl.c...,Yes,No,20,C P
7,[Retail Sales Associate Solution](https://www....,Yes,Yes,29,A B P
8,[Retail Manager w/ Sales Solution](https://www...,Yes,Yes,43,A B P
9,[Sales Supervisor Solution](https://www.shl.co...,Yes,Yes,38,A B P


In [None]:
recommend_assessments("Seeking a software engineering intern with basic understanding of Java, object-oriented programming, and problem-solving abilities. The test should evaluate logical thinking and programming fundamentals. Prefer remote-friendly assessments.")

Unnamed: 0,Assessment Name,Remote Testing,Adaptive/IRT,Duration,Test Type
0,[Java 8 (New)](https://www.shl.com/solutions/p...,Yes,No,18,K
1,[Core Java (Entry Level) (New)](https://www.sh...,Yes,No,13,K
2,[Programming Concepts](https://www.shl.com/sol...,Yes,Yes,25,K
3,[Core Java (Advanced Level) (New)](https://www...,Yes,No,13,K
4,[Software Business Analysis](https://www.shl.c...,Yes,Yes,30,K
5,[C# Programming (New)](https://www.shl.com/sol...,Yes,No,9,K
6,[Java Web Services (New)](https://www.shl.com/...,Yes,No,8,K
7,[JavaScript (New)](https://www.shl.com/solutio...,Yes,No,9,K
8,[R Programming (New)](https://www.shl.com/solu...,Yes,No,13,K
9,[C++ Programming (New)](https://www.shl.com/so...,Yes,No,10,K


In [None]:
recommend_assessments("https://www.linkedin.com/jobs/view/research-engineer-ai-at-shl-4194768899/?originalSubdomain=in", top_n=5)

Unnamed: 0,Assessment Name,Remote Testing,Adaptive/IRT,Duration,Test Type
0,[Smart Interview Live](https://www.shl.com/sol...,Yes,No,Variable,P
1,[Virtual Assessment and Development Centers](h...,Yes,No,,P
2,[Technology Professional 8.0 Job Focused Asses...,Yes,No,16,C P
3,[Entry Level Technical Support Solution](https...,Yes,No,18,P C
4,[SHL Verify Interactive - Inductive Reasoning]...,Yes,Yes,20,A S


In [None]:
# Save corpus embeddings
np.save('corpus_embeddings.npy', corpus_embeddings)

In [None]:
from google.colab import files
files.download('corpus_embeddings.npy')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>