In [1]:
import pandas as pd

data = r"D:\SHL\shl_full_formatted.xlsx"
df = pd.read_excel(data)
df.head()


Unnamed: 0,Name,URL,Remote Testing,Adaptive/IRT,Duration,Test Types,Description,Job Levels,Languages,Solution Type
0,Account Manager Solution,https://www.shl.com/products/product-catalog/v...,Yes,Yes,Approximate Completion Time in minutes = 49,"Competencies, Personality & Behavior, Ability ...",The Account Manager solution is an assessment ...,"Mid-Professional,","English (USA),",Pre-packaged Job Solutions
1,Administrative Professional - Short Form,https://www.shl.com/products/product-catalog/v...,Yes,Yes,Approximate Completion Time in minutes = 36,"Ability & Aptitude, Knowledge & Skills, Person...",The Administrative Professional solution is fo...,"Entry-Level,","English (USA),",Pre-packaged Job Solutions
2,Agency Manager Solution,https://www.shl.com/products/product-catalog/v...,Yes,Yes,Approximate Completion Time in minutes = 51,"Ability & Aptitude, Biodata & Situational Judg...",The Agency Manager solution is for mid-level s...,"Front Line Manager, Manager, Supervisor,","English (USA),",Pre-packaged Job Solutions
3,Apprentice + 8.0 Job Focused Assessment,https://www.shl.com/products/product-catalog/v...,Yes,No,Approximate Completion Time in minutes = 30,"Biodata & Situational Judgement, Personality &...",The Apprentice + 8.0 Job-Focused Assessment is...,"General Population, Graduate, Entry-Level,","English International, German,",Pre-packaged Job Solutions
4,Apprentice 8.0 Job Focused Assessment,https://www.shl.com/products/product-catalog/v...,Yes,No,Approximate Completion Time in minutes = 20,"Biodata & Situational Judgement, Personality &...",The Apprentice 8.0 Job-Focused Assessment is a...,"Entry-Level, General Population, Graduate,","English International, German, French,",Pre-packaged Job Solutions


In [2]:
df["Duration"] = df["Duration"].str.extract(r'(\d+)').astype(float)
df.head()

Unnamed: 0,Name,URL,Remote Testing,Adaptive/IRT,Duration,Test Types,Description,Job Levels,Languages,Solution Type
0,Account Manager Solution,https://www.shl.com/products/product-catalog/v...,Yes,Yes,49.0,"Competencies, Personality & Behavior, Ability ...",The Account Manager solution is an assessment ...,"Mid-Professional,","English (USA),",Pre-packaged Job Solutions
1,Administrative Professional - Short Form,https://www.shl.com/products/product-catalog/v...,Yes,Yes,36.0,"Ability & Aptitude, Knowledge & Skills, Person...",The Administrative Professional solution is fo...,"Entry-Level,","English (USA),",Pre-packaged Job Solutions
2,Agency Manager Solution,https://www.shl.com/products/product-catalog/v...,Yes,Yes,51.0,"Ability & Aptitude, Biodata & Situational Judg...",The Agency Manager solution is for mid-level s...,"Front Line Manager, Manager, Supervisor,","English (USA),",Pre-packaged Job Solutions
3,Apprentice + 8.0 Job Focused Assessment,https://www.shl.com/products/product-catalog/v...,Yes,No,30.0,"Biodata & Situational Judgement, Personality &...",The Apprentice + 8.0 Job-Focused Assessment is...,"General Population, Graduate, Entry-Level,","English International, German,",Pre-packaged Job Solutions
4,Apprentice 8.0 Job Focused Assessment,https://www.shl.com/products/product-catalog/v...,Yes,No,20.0,"Biodata & Situational Judgement, Personality &...",The Apprentice 8.0 Job-Focused Assessment is a...,"Entry-Level, General Population, Graduate,","English International, German, French,",Pre-packaged Job Solutions


In [3]:
df = df.drop(["Languages", "Solution Type"], axis=1)

In [4]:
df.head()

Unnamed: 0,Name,URL,Remote Testing,Adaptive/IRT,Duration,Test Types,Description,Job Levels
0,Account Manager Solution,https://www.shl.com/products/product-catalog/v...,Yes,Yes,49.0,"Competencies, Personality & Behavior, Ability ...",The Account Manager solution is an assessment ...,"Mid-Professional,"
1,Administrative Professional - Short Form,https://www.shl.com/products/product-catalog/v...,Yes,Yes,36.0,"Ability & Aptitude, Knowledge & Skills, Person...",The Administrative Professional solution is fo...,"Entry-Level,"
2,Agency Manager Solution,https://www.shl.com/products/product-catalog/v...,Yes,Yes,51.0,"Ability & Aptitude, Biodata & Situational Judg...",The Agency Manager solution is for mid-level s...,"Front Line Manager, Manager, Supervisor,"
3,Apprentice + 8.0 Job Focused Assessment,https://www.shl.com/products/product-catalog/v...,Yes,No,30.0,"Biodata & Situational Judgement, Personality &...",The Apprentice + 8.0 Job-Focused Assessment is...,"General Population, Graduate, Entry-Level,"
4,Apprentice 8.0 Job Focused Assessment,https://www.shl.com/products/product-catalog/v...,Yes,No,20.0,"Biodata & Situational Judgement, Personality &...",The Apprentice 8.0 Job-Focused Assessment is a...,"Entry-Level, General Population, Graduate,"


In [5]:
df.to_csv('cleaned_data.csv', index=False)

In [6]:
df.isnull().sum()

Name               0
URL                0
Remote Testing     0
Adaptive/IRT       0
Duration          41
Test Types         0
Description        0
Job Levels         1
dtype: int64

In [7]:
df['Duration'] = df['Duration'].fillna(df['Duration'].median())

df['Job Levels'] = df['Job Levels'].fillna('All Levels')



In [10]:
df.to_csv('cleaned_data_new.csv', index=False)

In [8]:
df2 = pd.read_excel(r"D:\SHL\cleaned_data_foramatted.xlsx")

merged_df = pd.concat([df, df2], ignore_index=True)

merged_df.to_csv('merged_file.csv', index=False)

In [9]:
from dotenv import load_dotenv
import google.generativeai as genai
import os

# Load environment variables
load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# Initialize the model
model = genai.GenerativeModel(model_name="gemini-2.0-flash")

def find_assessment_summary(summary: str):
    """
    Extract the Job Summary using the model's generate_content method.
    """
    prompt = """Summarize the following assessment description for a job position. Focus on:
    - Core responsibilities
    - Sample tasks
    - Relevant job titles
    - Variations or configurations

    Return a **1-2 line summary**. No bullet points or extra commentary.

    Example:
    Assessment Description: "This test evaluates customer service reps on conflict resolution, tone matching, and typing speed in chat support."
    Summary: "Assesses chat support skills like conflict resolution and typing speed; ideal for customer service roles."

    Assessment Description:"""
            
    input_text = f"{prompt}\n{summary}"

    response = model.generate_content(
        contents=input_text,
        generation_config={
            "temperature": 0.7,
            "top_p": 1,
            "top_k": 1,
            "max_output_tokens": 300
        }
    )

    assessment_summary = response.text.strip() 
    return assessment_summary


  from .autonotebook import tqdm as notebook_tqdm


In [10]:
import pandas as pd
import time

df = pd.read_excel("D:\SHL\merged_file.xlsx")

summaries = []
for index, row in df.iterrows():
    assessment_description = row['Description']

    summarized_text = find_assessment_summary(assessment_description)
    summaries.append(summarized_text)

    if (index + 1) % 15 == 0:  
        print("Rate limit reached, waiting for 60 seconds...")
        time.sleep(60)  

    else:
        time.sleep(4)

df['summarized_description'] = summaries

print("Summarization complete. Summarized data saved.")

Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60 seconds...
Rate limit reached, waiting for 60

In [11]:
df.to_excel(r"D:\SHL ASSESSMENT\summarized_assessment_data_with_desc_new.xlsx", index=False)
