In [None]:
import pandas as pd

# Load the CSV file
data = pd.read_csv('Resume.csv')

# Select only the relevant columns
resumes = data[['ID', 'Resume_str', 'Category']]

# Basic text cleaning (removing extra spaces, newlines, etc.)
def clean_text(text):
    text = text.replace('\n', ' ')  # Remove newlines
    text = text.replace('\r', '')  # Remove carriage returns
    text = text.strip()  # Remove leading and trailing spaces
    return text

resumes['cleaned_resume'] = resumes['Resume_str'].apply(clean_text)


In [None]:
import spacy
from spacy import displacy

# Load pre-trained spaCy model for English
nlp = spacy.load('en_core_web_sm')

# Process a resume string using spaCy's NER model
def extract_entities(text):
    doc = nlp(text)
    entities = []
    for ent in doc.ents:
        entities.append((ent.text, ent.label_))  # Collect named entities and their labels
    return entities

# Apply NER to each cleaned resume
resumes['entities'] = resumes['cleaned_resume'].apply(extract_entities)

# Optional: Visualize one resume's entities (for validation)
displacy.render(nlp(resumes['cleaned_resume'][0]), style='ent', jupyter=True)


In [3]:
# Save the extracted entities to a new CSV
resumes[['ID', 'entities', 'Category']].to_csv('Processed_Resumes.csv', index=False)

In [4]:
!pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence_transformers
Successfully installed sentence_transformers-3.1.1


In [5]:
from huggingface_hub import login

login('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')

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [6]:
from sentence_transformers import SentenceTransformer

# Load a pre-trained SentenceTransformer model (choose one)
model = SentenceTransformer('all-MiniLM-L6-v2')  # or try 'paraphrase-MPnet-base-v2'

# Generate embeddings for each resume (using 'entities_text' from NER or 'cleaned_resume' if needed)
#resumes['embeddings'] = resumes['entities_text'].apply(lambda text: model.encode(text))

resumes['embeddings'] = resumes['cleaned_resume'].apply(lambda text: model.encode(text))


# Example: Checking the first embedding vector
print(resumes['embeddings'].iloc[0])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[-4.44783047e-02  2.12291945e-02  5.16218226e-03  8.68071467e-02
 -1.90762151e-02  7.81410199e-04  2.37274114e-02 -4.06728275e-02
 -1.07696272e-01 -4.17434759e-02  2.73197778e-02 -3.32663879e-02
  5.37253655e-02  3.51293758e-03  3.34979966e-02 -6.90992037e-03
  5.97888939e-02  4.97065997e-03  2.38164719e-02 -8.98992270e-02
 -2.34015770e-02 -3.63232605e-02 -6.50669709e-02  5.72308712e-03
 -8.66769180e-02 -2.19410807e-02 -1.34049105e-02  6.51615858e-02
 -6.56526983e-02 -4.83934358e-02 -1.35338940e-02 -3.70528251e-02
  8.72621164e-02 -2.55778115e-02  4.85715382e-02  1.14448257e-01
  3.60471718e-02 -2.03129333e-02  3.95208597e-02 -3.87397073e-02
 -4.27798219e-02 -4.83792499e-02 -2.18281616e-02 -5.85983880e-02
  1.58920344e-02 -2.26589199e-02 -7.07798684e-03 -2.54447218e-02
 -8.44771117e-02  3.12941857e-02 -5.66065572e-02 -7.04528647e-04
  4.22375798e-02  3.30649987e-02 -4.74978648e-02  4.66278903e-02
  2.50048749e-02  2.18113232e-02 -1.04683854e-01 -3.39016616e-02
 -1.30508557e-01 -2.02439

In [7]:
import numpy as np

# Convert embeddings to a format that can be easily saved
resumes['embeddings'] = resumes['embeddings'].apply(lambda emb: np.array(emb))

# Save to CSV (embedding vectors might be large, so consider saving as JSON or a binary file if needed)
resumes[['ID', 'embeddings', 'Category']].to_csv('Resumes_with_Embeddings.csv', index=False)

# Alternatively, save the embeddings using numpy if CSV gets too large
np.save('resume_embeddings.npy', resumes['embeddings'].to_list())


In [8]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.5.7-py3-none-any.whl.metadata (6.8 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.30.6-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.6.6-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.27.0-py3-none-any.whl.metadata (1.4 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_pro

In [9]:
import pandas as pd
import chromadb
#from chromadb.config import Settings # This import is no longer needed
import numpy as np

# Load the processed resumes CSV with embeddings
resumes = pd.read_csv('Resumes_with_Embeddings.csv')

# Load embeddings saved as numpy arrays (this ensures they are properly formatted)
embeddings = np.load('resume_embeddings.npy', allow_pickle=True)

# Set up Chroma DB client
# client = chromadb.Client(Settings(chroma_db_impl="duckdb+parquet")) # This line is no longer needed
client = chromadb.Client()

# Create a collection in Chroma DB (or use an existing one)
collection = client.create_collection(name="resume_embeddings")

# Prepare metadata from resumes
metadata = resumes[['ID', 'Category']].to_dict(orient='records')

# Prepare IDs
ids = resumes['ID'].astype(str).tolist()

# Add embeddings to Chroma DB
collection.add(
    ids=ids,
    embeddings=embeddings.tolist(),  # Ensure embeddings are in list format
    metadatas=metadata,
)

print("Documents added successfully to Chroma DB!")

Documents added successfully to Chroma DB!


In [10]:
collection

Collection(id=afe58725-1d0c-4185-9a1e-ae88fbcecb27, name=resume_embeddings)

In [11]:
!pip install chromadb transformers torch



In [12]:
import chromadb
#from chromadb.config import Settings
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# Load the pre-trained SentenceTransformer model for embedding queries
query_model = SentenceTransformer('all-MiniLM-L6-v2')

# Load the language model for generation (e.g., GPT-2 or any other model)
generator = pipeline('text-generation', model='gpt2')  # Replace with your desired model

# Function to perform retrieval and generation
def rag_pipeline(user_query):
    # Step 1: Convert the user query into an embedding
    query_embedding = query_model.encode(user_query)

    # Step 2: Use the vector database to retrieve relevant documents
    # Convert query_embedding to a list of lists
    results = collection.query(
        query_embeddings=[query_embedding.tolist()], # Changed this line
        n_results=5,  # Adjust the number of results to retrieve
        include=['metadatas', 'embeddings'],  # Include metadata and embeddings
    )

  # Step 3: Prepare the retrieved documents for generation
    #retrieved_texts = [result['document'] for result in results['metadatas']] # This line was causing the error
    retrieved_texts = results['metadatas'] # Access the list of dictionaries directly
    # Join the retrieved texts into a single context
    context = "\n".join([f"{doc['Category']}: {doc['ID']}" for doc in retrieved_texts[0]]) # Access the first list of dictionaries

    # Step 4: Pass the context to the language model to generate a response
    generated_response = generator(f"Based on the following context:\n{context}\nAnswer the question: {user_query}", max_length=150)

    return generated_response

# Example usage
user_query = "What skills are required for a Data Scientist?"
response = rag_pipeline(user_query)
print(response[0]['generated_text'])



config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Based on the following context:
ENGINEERING: 12011623
CONSULTANT: 17025292
PUBLIC-RELATIONS: 16103783
INFORMATION-TECHNOLOGY: 37242217
ENGINEERING: 27040860
Answer the question: What skills are required for a Data Scientist? This question is also asked of Engineering, and is to be answered in accordance with the C.E.O.S., the C.E.O.R., and the G.O.U.
Source (s): Initiative, C.E.O.S., G.O.N.(incl.) and C.E.O.C.(in


In [13]:
user_query = "Which resume best suits the role of HR? give the id of resume from the database"
response = rag_pipeline(user_query)
print(response[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Based on the following context:
HR: 24508725
HR: 24402267
HR: 14256329
HR: 23408537
ARTS: 16244633
Answer the question: Which resume best suits the role of HR? give the id of resume from the database on job search page or the name of a job on that resume.
In order to answer the question: Which resume best suits the role of HR or not, you must select the best available resume (eg: "My resume is very good, how I see it will suit me better!") (and let's also check your previous HR experience and resume: HR: 0) The problem in selecting your resume should not be


In [15]:
user_query = "Which resume best suits the role of DESIGNER with 15+ experience? give the id of resume from the database"

In [17]:
from sentence_transformers import CrossEncoder

# Example: Using a cross-encoder for document re-ranking
cross_encoder = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-6')  # You can try other models as well

def rag_pipeline_with_reranking(user_query):
    # Step 1: Convert the user query into an embedding
    query_embedding = query_model.encode(user_query)

    # Step 2: Retrieve initial documents from ChromaDB
    results = collection.query(
        query_embeddings=[query_embedding.tolist()],
        n_results=10,  # Retrieve more results for re-ranking
        include=['metadatas', 'embeddings']
    )

    # Step 3: Re-rank the results using a cross-encoder
    retrieved_texts = results['metadatas']
    candidate_docs = [f"{doc['Category']}: {doc['ID']}" for doc in retrieved_texts[0]]
    scores = cross_encoder.predict([(user_query, doc) for doc in candidate_docs])

    # Step 4: Select top documents after re-ranking
    top_docs = sorted(zip(candidate_docs, scores), key=lambda x: x[1], reverse=True)[:5]

    # Step 5: Concatenate top-ranked documents to form the context
    context = "\n".join([doc[0] for doc in top_docs])

    # Step 6: Generate a response based on the top-ranked context
    generated_response = generator(f"Context:\n{context}\nAnswer the question: {user_query}", max_length=150)

    return generated_response

# Example usage
response = rag_pipeline_with_reranking(user_query)
print(response[0]['generated_text'])


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Context:
DESIGNER: 24285142
DESIGNER: 15425154
DESIGNER: 30965258
DESIGNER: 39252859
DESIGNER: 25061645
Answer the question: Which resume best suits the role of DESIGNER with 15+ experience? give the id of resume from the database and the answer to this question. You get the answer.

So, next time you have a question about whether or not it's right for an app to be implemented over the network by a specific person, you might want to use this question to help answer it.

References


**Setting up fastAPI**

The API will:

- Accept a user query in text format via a POST request.
- Run the query through the RAG pipeline to retrieve relevant information and generate a response.
- Return the generated response as the API output in JSON format.

Steps:
- Set up a FastAPI app.
- Create an endpoint to accept a query as input.
- Implement the RAG pipeline inside the endpoint.
- Return the generated response.
- Test the API.

In [20]:
!pip install fastapi uvicorn pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [27]:
!ngrok config add-authtoken 2mONWyk1STkVsA0TyQ3RvFXE89E_2DSEPnmdWe9zBrcRSgicD

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [59]:
from fastapi import FastAPI
from pydantic import BaseModel
from pyngrok import ngrok
from fastapi.middleware.cors import CORSMiddleware
import uvicorn

import chromadb
import numpy as np
from sentence_transformers import SentenceTransformer, CrossEncoder
from transformers import pipeline

# Load models and set up ChromaDB client (same as before)
query_model = SentenceTransformer('all-MiniLM-L6-v2')
cross_encoder = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-6')
generator = pipeline('text-generation', model='gpt2')

client = chromadb.Client()
collection = client.get_collection(name="resume_embeddings")

# FastAPI app setup
app = FastAPI()

class QueryInput(BaseModel):
    query: str

def rag_pipeline(user_query):
    query_embedding = query_model.encode(user_query)

    results = collection.query(
        query_embeddings=[query_embedding.tolist()],
        n_results=10,
        include=['metadatas', 'embeddings'],
    )

    candidate_docs = [doc['Category'] + ": " + doc['ID'] for doc in results['metadatas']]
    scores = cross_encoder.predict([(user_query, doc) for doc in candidate_docs])
    top_docs = sorted(zip(candidate_docs, scores), key=lambda x: x[1], reverse=True)[:5]

    context = "\n".join([doc[0] for doc in top_docs])

    generated_response = generator(f"Context:\n{context}\nAnswer the question: {user_query}", max_length=150)
    return generated_response[0]['generated_text']

@app.post("/query")
async def get_response(input_data: QueryInput):
    user_query = input_data.query
    response = rag_pipeline(user_query)
    return {"response": response}

# Set up the ngrok tunnel to the Uvicorn server
# Start the Uvicorn server in the background
#uvicorn.run(app, host="0.0.0.0", port=8000)


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-36' coro=<Server.serve() done, defined at /usr/local/lib/python3.10/dist-packages/uvicorn/server.py:67> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/main.py", line 577, in run
    server.run()
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/server.py", line 65, in run
    return asyncio.run(self.serve(sockets=sockets))
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/lib/python3.10/asyncio/tasks.py", 

In [61]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import chromadb
import numpy as np
from sentence_transformers import SentenceTransformer, CrossEncoder
from transformers import pipeline

# Load models and set up ChromaDB client
query_model = SentenceTransformer('all-MiniLM-L6-v2')
cross_encoder = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-6')
generator = pipeline('text-generation', model='gpt2')

client = chromadb.Client()
collection = client.get_collection(name="resume_embeddings")

# FastAPI app setup
app = FastAPI()

class QueryInput(BaseModel):
    user_query: str

# Define the RAG pipeline
def rag_pipeline(user_query):
    try:
        # Step 1: Convert user query into an embedding
        query_embedding = query_model.encode(user_query)

        # Step 2: Retrieve relevant documents from Chroma DB
        results = collection.query(
            query_embeddings=[query_embedding.tolist()],
            n_results=10,
            include=['metadatas', 'embeddings'],
        )

        if not results['metadatas']:  # Check if results are empty
            raise ValueError("No relevant documents found.")

        # Step 3: Rank the documents using the cross-encoder model
        candidate_docs = [doc['Category'] + ": " + doc['ID'] for doc in results['metadatas']]
        scores = cross_encoder.predict([(user_query, doc) for doc in candidate_docs])
        top_docs = sorted(zip(candidate_docs, scores), key=lambda x: x[1], reverse=True)[:5]

        # Step 4: Pass the context to the language model for response generation
        context = "\n".join([doc[0] for doc in top_docs])
        generated_response = generator(
            f"Context:\n{context}\nAnswer the question: {user_query}",
            max_length=150
        )

        return generated_response[0]['generated_text']

    except Exception as e:
        # Log error and return the exception message
        print(f"Error during RAG pipeline: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# POST endpoint to accept queries
@app.post("/query")
async def get_response(input_data: QueryInput):
    try:
        user_query = input_data.user_query
        response = rag_pipeline(user_query)
        return {"response": response}
    except HTTPException as e:
        raise e  # Raise HTTP errors directly
    except Exception as e:
        # General fallback for internal errors
        raise HTTPException(status_code=500, detail="An error occurred while processing the request.")


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-43' coro=<Server.serve() done, defined at /usr/local/lib/python3.10/dist-packages/uvicorn/server.py:67> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/main.py", line 577, in run
    server.run()
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/server.py", line 65, in run
    return asyncio.run(self.serve(sockets=sockets))
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/lib/python3.10/asyncio/tasks.py", 

In [62]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn

ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)

Public URL: https://99af-34-139-62-87.ngrok-free.app


INFO:     Started server process [431]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     5.245.229.148:0 - "POST /query HTTP/1.1" 422 Unprocessable Entity
Error during RAG pipeline: list indices must be integers or slices, not str
INFO:     5.245.229.148:0 - "POST /query HTTP/1.1" 500 Internal Server Error


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [431]
