In [1]:
import pandas as pd
import numpy as np
import os
import sys
from typing import List, Dict
from tqdm import tqdm
import json
import traceback

In [2]:
sys.path.append(os.path.abspath('.'))

In [3]:
from embeddings.faiss_wrapper import FaissIndex
from llm.llm_recommender import LLMRecommender




In [4]:
test_df = pd.read_csv("test.csv")
print(f"Loaded {len(test_df)} test samples")

Loaded 45 test samples


In [5]:
ground_truth = {}
for _, row in test_df.iterrows():
    query = row['Query']
    assessment = row['Assessments']
    if query not in ground_truth:
        ground_truth[query] = []
    ground_truth[query].append(assessment)
print(f"Found {len(ground_truth)} unique queries")

Found 7 unique queries


In [6]:
def calculate_metrics(relevant: List[str], recommended: List[str], k_values: List[int]):
    results = {"recall": {}, "ap": {}}
    
    for k in k_values:
        recommended_k = recommended[:k]
        relevant_found = set(relevant).intersection(set(recommended_k))
        recall = len(relevant_found) / len(relevant) if relevant else 0
        
        ap = 0.0
        hits = 0
        for i, item in enumerate(recommended_k):
            if item in relevant:
                hits += 1
                ap += hits / (i + 1)
        ap = ap / min(k, len(relevant)) if min(k, len(relevant)) > 0 else 0
        
        results["recall"][k] = recall
        results["ap"][k] = ap
    
    return results

In [None]:
api_key = os.getenv("ACCESS_TOKEN")
if not api_key:
    print("Warning: ACCESS_TOKEN not found")
    api_key = input("Enter your ACCESS TOKEN: ")

# Correct path for the FAISS index file
index_path = 'C:/Users/devanshi/SHL-Assessment-Recommendation-System_Devanshi-Singh/data/embeddings/faiss_index.faiss'

try:
    vector_index = FaissIndex()
    vector_index.load(index_path)  # Load the FAISS index from the correct path

    recommender = LLMRecommender(vector_index=vector_index, api_key=api_key)
    print("Recommender initialized successfully")

except Exception as e:
    print(f"Error initializing: {e}")
    import traceback
    traceback.print_exc()
    sys.exit(1)

Initialized Gemini model: gemini-1.5-pro
Recommender initialized successfully


In [8]:
k_values = [1, 3, 5, 10]
metrics = {"recall": {k: [] for k in k_values}, "ap": {k: [] for k in k_values}}

print("\nEvaluating recommender system...")
for query, relevant in tqdm(ground_truth.items()):
    try:
        recommendations = recommender.recommend(
            job_description=query,
            top_k=20,
            rerank=True,
            final_results=max(k_values)
        )
        recommended_names = [rec.get("name", "") for rec in recommendations]
        query_metrics = calculate_metrics(relevant, recommended_names, k_values)

        for k in k_values:
            metrics["recall"][k].append(query_metrics["recall"][k])
            metrics["ap"][k].append(query_metrics["ap"][k])
    except Exception as e:
        print(f"Error processing query '{query[:30]}...': {e}")
        for k in k_values:
            metrics["recall"][k].append(0.0)
            metrics["ap"][k].append(0.0)

# === Mean Metrics Calculation ===
mean_metrics = {
    "Mean Recall": {k: np.mean(metrics["recall"][k]) for k in k_values},
    "MAP": {k: np.mean(metrics["ap"][k]) for k in k_values}
}


Evaluating recommender system...


  0%|          | 0/7 [00:00<?, ?it/s]

Error processing query: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 10
}
]
Error reranking assessments: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 9
}
]
Error processing query: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn

 14%|█▍        | 1/7 [00:12<01:17, 12.88s/it]

Error generating explanation: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 59
}
]
Error processing query: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 58
}
]
Error reranking assessments: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description:

 29%|██▊       | 2/7 [00:19<00:45,  9.04s/it]

Error generating explanation: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 52
}
]
[ERROR] Failed to extract job requirements from Gemini: '\n  "technical_skills"'
Error processing query 'I am looking for a COO for my ...': Gemini failed to extract job requirements.
Error processing query: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 52
}
]
Error reranking assessments: 429 You exceeded your current quo

 57%|█████▋    | 4/7 [00:25<00:16,  5.37s/it]

Error generating explanation: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 46
}
]
[ERROR] Failed to extract job requirements from Gemini: '\n  "technical_skills"'
Error processing query 'Find me 1 hour long assesment ...': Gemini failed to extract job requirements.
Error processing query: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 46
}
]
Error reranking assessments: 429 You exceeded your current quo

100%|██████████| 7/7 [00:32<00:00,  4.65s/it]

Error generating explanation: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 39
}
]
[ERROR] Failed to extract job requirements from Gemini: '\n  "technical_skills"'
Error processing query 'KEY RESPONSIBITILES: Manage th...': Gemini failed to extract job requirements.





In [42]:
results = {
    "mean_metrics": mean_metrics,
    "queries_evaluated": len(ground_truth),
    "timestamp": pd.Timestamp.now().isoformat()
}
with open("evaluation_results.json", "w") as f:
    json.dump(results, f, indent=2)
print("Results saved to evaluation_results.json")

# === Summary ===
print("\nEVALUATION SUMMARY:")
print(f"- System achieved Mean Recall@3 of {mean_metrics['Mean Recall'][3]:.4f} ({mean_metrics['Mean Recall'][3]*100:.1f}%)")
print(f"- MAP@3 score is {mean_metrics['MAP'][3]:.4f}")
print(f"- Vector search with LLM reranking shows {'+' if mean_metrics['MAP'][3] > 0.5 else '-'}performance")
print(f"- Best performing K value: {max(k_values, key=lambda k: mean_metrics['MAP'][k])}")

# === Observations ===
if mean_metrics['Mean Recall'][3] >= 0.6:
    print("+ Strong recall performance indicates good coverage of relevant assessments")
else:
    print("- Lower recall suggests need for improved candidate selection")

if mean_metrics['MAP'][3] >= 0.5:
    print("+ Good MAP scores show effective ranking of relevant assessments")
else:
    print("- MAP scores indicate room for improvement in reranking quality")

Results saved to evaluation_results.json

EVALUATION SUMMARY:
- System achieved Mean Recall@3 of 0.0000 (0.0%)
- MAP@3 score is 0.0000
- Vector search with LLM reranking shows -performance
- Best performing K value: 1
- Lower recall suggests need for improved candidate selection
- MAP scores indicate room for improvement in reranking quality
