# All imports and inits

In [None]:
import gradio as gr
import os
import time
import torch
import numpy as np

from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone

load_dotenv()

PINECONE_API = os.getenv("PINECONE_API")

def track_time(func):
    def wrapper(*args, **kwargs):
        start = time.perf_counter()
        result = func(*args, **kwargs)
        end = time.perf_counter()
        print(f"[Time Tracker] `{func.__name__}` took {end - start:.4f} seconds")
        return result

    return wrapper

# Init Pinecone

In [3]:
pc = Pinecone(api_key=PINECONE_API)

# print(PINECONE_API)

# Connect to the index
index = pc.Index("potential-talents")  # -- COMPLETE SURGICAL TECH BOOTCAMP

# Embedding Function



In [None]:
sbert_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

@track_time
def get_embedding(text="None"):
    # Generate embedding using the pre-loaded model
    embedding = sbert_model.encode(text)

    # Return the embedding as a list/array
    return embedding.tolist()

#print(len(get_embedding("Student")))

[Time Tracker] `get_embedding` took 0.1646 seconds
384


  attn_output = torch.nn.functional.scaled_dot_product_attention(


# Query Pinecone


In [None]:
# Function to query Pinecone index using embeddings
@track_time
def query_pinecone(embedding):
    # Use keyword arguments to pass the embedding and other parameters
    result = index.query(vector=embedding, top_k=5, include_metadata=True)
    return result["matches"]

print(query_pinecone(get_embedding("Student")))

[Time Tracker] `get_embedding` took 0.0074 seconds
[Time Tracker] `query_pinecone` took 1.1179 seconds
[{'id': 'vec_15',
 'metadata': {'chunk_id': 0.0,
              'file_type': 'excel',
              'row_id': 15.0,
              'source': './potential-talents.xlsx',
              'text': 'id: 16 job_title: Native English Teacher at EPIK '
                      '(English Program in Korea) location: Kanada connection: '
                      '500+  fit: nan'},
 'score': 0.334692508,
 'values': []}, {'id': 'vec_31',
 'metadata': {'chunk_id': 0.0,
              'file_type': 'excel',
              'row_id': 31.0,
              'source': './potential-talents.xlsx',
              'text': 'id: 32 job_title: Native English Teacher at EPIK '
                      '(English Program in Korea) location: Kanada connection: '
                      '500+  fit: nan'},
 'score': 0.334044933,
 'values': []}, {'id': 'vec_95',
 'metadata': {'chunk_id': 0.0,
              'file_type': 'excel',
          

In [None]:
# Tokenizer to count number of tokens
"""
Putting tokenizer outside of the function to avoid reinitialization and optimize performance.
"""
tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-base-en")

@track_time
def count_tokens(text: str) -> int:
    # Encode the text into tokens
    tokens = tokenizer.encode(text)
    return len(tokens)

# Load Finetuned Model

In [18]:
# Load the finetuned model from NLP_OPS
model_path = "finetuned_job_title_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
base_model = AutoModel.from_pretrained(model_path)

# Load the similarity head
similarity_head_path = os.path.join(model_path, "similarity_head.pt")
if os.path.exists(similarity_head_path):
    similarity_head = torch.nn.Linear(base_model.config.hidden_size, 1)
    similarity_head.load_state_dict(
        torch.load(similarity_head_path, map_location=torch.device("cpu"))
    )
else:
    # Create a new head if file doesn't exist
    similarity_head = torch.nn.Linear(base_model.config.hidden_size, 1)


# Create the full model
class SimilarityModel(torch.nn.Module):
    def __init__(self, encoder, head):
        super().__init__()
        self.encoder = encoder
        self.head = head

    def forward(self, **inputs):
        outputs = self.encoder(**inputs)
        if hasattr(outputs, "last_hidden_state"):
            embeddings = outputs.last_hidden_state.mean(dim=1)
        else:
            embeddings = outputs[0].mean(dim=1)
        return self.head(embeddings)


model = SimilarityModel(base_model, similarity_head)
model.eval()  # Set to evaluation mode

SimilarityModel(
  (encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): lora.Linear(
                (base_layer): Linear(in_features=768, out_features=768, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=768, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_f

# Process User Query

In [17]:
@track_time
def get_model_response(query, candidates, system_prompt="", user_prompt=""):
    """Get response using the finetuned model"""
    # If prompts are provided, we can use them to enhance the query
    enhanced_query = query
    if system_prompt and user_prompt:
        # You could combine the prompts with the query if your model benefits from it
        enhanced_query = f"{system_prompt}\n\n{user_prompt}\n\n{query}"

    with torch.no_grad():
        # Encode the query
        query_inputs = tokenizer(
            enhanced_query, return_tensors="pt", padding=True, truncation=True
        )

        # Get similarity scores for each candidate
        scores = []
        for candidate in candidates:
            # Encode the candidate
            candidate_inputs = tokenizer(
                candidate, return_tensors="pt", padding=True, truncation=True
            )

            # Get embeddings
            query_embedding = model.encoder(**query_inputs)[0].mean(dim=1)
            candidate_embedding = model.encoder(**candidate_inputs)[0].mean(dim=1)

            # Calculate similarity score
            similarity = torch.nn.functional.cosine_similarity(
                query_embedding, candidate_embedding
            )
            scores.append(similarity.item())

        # Sort candidates by score
        sorted_indices = np.argsort(scores)[::-1]
        sorted_candidates = [candidates[i] for i in sorted_indices]
        sorted_scores = [scores[i] for i in sorted_indices]

        # Format the response
        response = "Here are the most relevant matches:\n\n"
        for i, (candidate, score) in enumerate(
            zip(sorted_candidates[:5], sorted_scores[:5])
        ):
            response += f"{i + 1}. {candidate} (Score: {score:.3f})\n"

        return response


@track_time
def process_user_query(user_query: str, conversation_history: list, response_type: str):
    # Generate embedding and get relevant context
    embedding = get_embedding(user_query)
    relevant_chunks = query_pinecone(embedding)

    # Extract text and metadata from chunks
    candidates = [chunk["metadata"]["text"] for chunk in relevant_chunks]
    context = "\n".join(candidates)

    # Format conversation history for the prompt
    history_str = "\n".join(
        f"User: {user}\nAssistant: {response}"
        for user, response in conversation_history
    )

    # Create system prompt
    system_prompt = f"""
    You are an AI Talent Hunter that helps find the best candidates based on job requirements.
    
    Conversation history:
    {history_str}
    
    Candidate database:
    {context}
    
    Analyze the user's query and find the most relevant candidates from the database.
    """

    # User prompt
    user_prompt = f"""
    New recruitment query:
    "{user_query}"
    
    Response type requested: {response_type}
    """

    # Get response from finetuned model
    model_response = get_model_response(
        user_query, candidates, system_prompt, user_prompt
    )

    # Combine with context for display
    full_response = f"""Based on your query about "{user_query}", I've found these potential matches:

{model_response}

This analysis is based on our finetuned talent matching model that evaluates semantic similarity between your query and candidate profiles.
"""

    # First, yield a response with empty text to set up the message
    temp_history = conversation_history.copy()
    temp_history.append((user_query, ""))
    yield temp_history, context

    # Simulate streaming by yielding chunks of the response
    chunks = [full_response[i : i + 20] for i in range(0, len(full_response), 20)]
    partial_response = ""

    for chunk in chunks:
        partial_response += chunk
        temp_history = conversation_history.copy()
        temp_history.append((user_query, partial_response))
        yield temp_history, context
        time.sleep(0.01)  # Small delay to simulate streaming

    # Return the final history with the complete response
    final_history = conversation_history.copy()
    final_history.append((user_query, full_response))
    yield final_history, context


@track_time
def create_gradio_interface(conversation_history, response_type="default"):
    with gr.Blocks() as interface:
        gr.Markdown("# 🔍 AI Talent Hunter")
        gr.Markdown(
            "Welcome! I'll help you find the perfect candidates. Describe the position or skills you're looking for."
        )

        # State management
        chat_history = gr.State(conversation_history)

        with gr.Row():
            chatbot = gr.Chatbot(height=500)
            with gr.Column(scale=0.5):
                context_display = gr.Textbox(
                    label="Candidate Database Results", interactive=False
                )

        user_input = gr.Textbox(
            label="Your Recruitment Query",
            placeholder="E.g., 'Find me experienced surgical technicians' or 'I need candidates with healthcare administration skills'",
        )

        with gr.Row():
            submit_btn = gr.Button("Search Candidates", variant="primary")
            undo_btn = gr.Button("Undo Last")
            clear_btn = gr.Button("Clear History")

        def handle_submit(user_query, history):
            if not user_query.strip():
                return gr.update(), history, ""

            # Use the generator directly from process_user_query
            # This will yield incremental updates as they arrive
            response_generator = process_user_query(user_query, history, response_type)

            for updated_history, context in response_generator:
                # Directly update the chatbot with each streaming chunk
                yield "", updated_history, context, updated_history

        # Component interactions with streaming support
        submit_btn.click(
            handle_submit,
            [user_input, chat_history],
            [user_input, chat_history, context_display, chatbot],
        )

        # Add submit on Enter key press
        user_input.submit(
            handle_submit,
            [user_input, chat_history],
            [user_input, chat_history, context_display, chatbot],
        )

        undo_btn.click(
            lambda history: history[:-1] if history else [],
            [chat_history],
            [chat_history],
        ).then(lambda x: x, [chat_history], [chatbot])

        clear_btn.click(lambda: [], None, [chat_history]).then(
            lambda: ([], ""), None, [chatbot, context_display]
        )

    return interface


def main():
    """
    Main entry point for the application.

    Initializes the conversation history with a welcome message,
    creates the Gradio interface, and launches the web app.
    """
    # Initialize conversation history with welcome message
    welcome_message = "Hello! I'm your AI Talent Hunter. I can help you find the perfect candidates for your positions by analyzing our talent database. What kind of talent are you looking for today?"
    initial_conversation_history = [("", welcome_message)]

    # Create and launch the interface
    interface = create_gradio_interface(initial_conversation_history, "long")
    interface.launch(share=True)


if __name__ == "__main__":
    main()

  chatbot = gr.Chatbot(height=500)


[Time Tracker] `create_gradio_interface` took 0.1318 seconds
* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://823f81a01c70a47e14.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


[Time Tracker] `process_user_query` took 0.0000 seconds
[Time Tracker] `get_embedding` took 0.0060 seconds
[Time Tracker] `query_pinecone` took 1.1748 seconds
[Time Tracker] `get_model_response` took 0.3466 seconds
[Time Tracker] `process_user_query` took 0.0000 seconds
[Time Tracker] `get_embedding` took 0.0167 seconds
[Time Tracker] `query_pinecone` took 0.2109 seconds
[Time Tracker] `get_model_response` took 0.6140 seconds
[Time Tracker] `process_user_query` took 0.0000 seconds
[Time Tracker] `get_embedding` took 0.0096 seconds
[Time Tracker] `query_pinecone` took 1.1781 seconds
[Time Tracker] `get_model_response` took 1.8151 seconds
