# Libraries

In [1]:
import pandas as pd
import re
from sentence_transformers import SentenceTransformer, util

**Load & Preprocess Dataset**

In [2]:

file_path = "/kaggle/input/shl-dataset-3/SHL dataset.xlsx"
xls = pd.ExcelFile(file_path)

# Display sheet names
sheet_names = xls.sheet_names

# Load data from the first sheet
data = xls.parse(sheet_names[0])

# Show first few rows
data.head()


Unnamed: 0,Relevant Job Roles,"Knowledge, Skills, Abilities",Test,time(min),Test Type,Remote Testing,Adaptive/IRT Support
0,"Accounts Receivable Specialist, Bill and Accou...","Invoice processing, Cheque and cash receipts ,...",Accounts Receivable Simulation,8,simulations,Yes,No
1,"General and Operations Manager, Business Manag...","Capacity planning and Layout planning,\nProces...",Operations Management,10,Knowledge and Skills,Yes,No
2,".Net Developer, Software Developer, Software E...","MVC architecture , MVC life cycle , Inversion ...",.NET MVC,8,Knowledge and Skills,Yes,No
3,"Programmers, Application Developers",Application Development  Application Foundati...,.NET Framework 4.5,30,Knowledge and Skills,Yes,No
4,".Net Developer, Software Developer, Software E...",Model View Communication\n• WPF data bindings\...,.NET MVVM,5,Knowledge and Skills,Yes,No


In [3]:
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with single space
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    return text.strip()

# Clean and combine text fields
data['Relevant Job Roles'] = data['Relevant Job Roles'].apply(clean_text)
data['Knowledge, Skills, Abilities'] = data['Knowledge, Skills, Abilities'].apply(clean_text)
data['Combined'] = data['Relevant Job Roles'] + " " + data['Knowledge, Skills, Abilities']

**Embed Dataset with Sentence Transformer**

In [4]:
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
dataset_embeddings = model.encode(data['Combined'].tolist(), convert_to_tensor=True)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

**Recommend Top-N Tests**

In [8]:
def get_user_input():
    roles = input()
    cleaned_roles = clean_text(roles)
    return cleaned_roles

def recommend_tests(user_input, top_n=10):
    """Generate test recommendations based on user input"""
    # Encode user input
    user_embedding = model.encode([user_input], convert_to_tensor=True)
    
    # Calculate similarity scores
    cos_scores = util.cos_sim(user_embedding, dataset_embeddings)[0]
     # Get top matches
    top_results = cos_scores.topk(top_n)
    
    # Prepare results
    recommendations = []
    for score, idx in zip(top_results[0], top_results[1]):
        record = data.iloc[idx.item()]
        recommendations.append({
            'Test': record['Test'],
            # 'Relevance Score': f"{score:.2f}",
            'Time (min)': record['time(min)'],
            'Test Type': record['Test Type'],
            'Remote Testing': record['Remote Testing'],
            'Adaptive/IRT Support': record['Adaptive/IRT Support']
        })
    
    return pd.DataFrame(recommendations)

In [9]:
if __name__ == "__main__":
    user_input = get_user_input()
    recommendations = recommend_tests(user_input)
    print(recommendations.to_string(index=False))

 hello


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

                                Test  Time (min)                         Test Type Remote Testing Adaptive/IRT Support
        Interpersonal Communications          35              knowledge and skills            Yes                   No
                            .NET WCF          11              Knowledge and Skills            Yes                   No
   Business Communication (adaptive)          24              Knowledge and Skills            Yes                   No
                          JavaScript           9              knowledge and skills            Yes                   No
                    Basic Statistics          10              Knowledge and Skills            Yes                   No
Basic Computer Literacy (Windows 10)          30 simulations, knowledge and skills            Yes                   No
 Conversational Multichat Simulation          11                       simulations            Yes                   No
                       C Programming          10