In [28]:
import chromadb
from chromadb.config import Settings

In [29]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [30]:
import pdfplumber
def extract_text_from_pdf(pdf_path):
    text=""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text


In [31]:
from langchain_groq import ChatGroq
from langchain.chains import LLMChain
from prompts.extracted_text_prompt_template import Extracted_Prompt_Template

llm = ChatGroq(
    model_name="llama-3.3-70b-versatile"
)

def cv_summary(extracted_text):
    chain = LLMChain(llm=llm, prompt=Extracted_Prompt_Template)
    summarized_text = chain.run(extracted_resume_text=extracted_text)
    return summarized_text


In [None]:
import chromadb

client = chromadb.PersistentClient(path=r"c:\\Users\\kanis\\Resume_Shortlisting")
collection = client.get_or_create_collection(name="resumes")

In [33]:
extracted_text = extract_text_from_pdf(r"C:\Users\kanis\Resume_Shortlisting\C1070.pdf")
summary = cv_summary(extracted_text)
collection.add(
        documents=[summary],
        ids=[str(hash(summary))]
    )

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


In [34]:
extracted_text = extract_text_from_pdf(r"C:\Users\kanis\Resume_Shortlisting\C1191.pdf")
summary = cv_summary(extracted_text)
collection.add(
        documents=[summary],
        ids=[str(hash(summary))]
    )

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


In [35]:
extracted_text = extract_text_from_pdf(r"C:\Users\kanis\Resume_Shortlisting\C1499.pdf")
summary = cv_summary(extracted_text)
collection.add(
        documents=[summary],
        ids=[str(hash(summary))]
    )

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


In [36]:
extracted_text = extract_text_from_pdf(r"C:\Users\kanis\Resume_Shortlisting\C1791.pdf")
summary = cv_summary(extracted_text)
collection.add(
        documents=[summary],
        ids=[str(hash(summary))]
    )

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


In [5]:
from prompts.job_description_prompt_template import job_description
import json
import chromadb
def query_vectorstore(job_description):
    client = chromadb.PersistentClient(path=r"c:\\Users\\kanis\\Resume_Shortlisting")
    collection = client.get_collection(name="resumes")
    # defining variables
    num_vectors = collection.count()
    num_vectors = int(0.8*(num_vectors))
    results = collection.query(
    query_texts=[job_description],  
    n_results=num_vectors, 
    include=["documents"]
    )
    documents = results.get("documents")  
    extracted_list = []
    for doc in documents[0]:  
        try:
            doc_dict = json.loads(doc)  # using JSON parser 
            extracted_text = {
                "name": doc_dict.get("Full Name", "N/A"),
                "email": doc_dict.get("Email", "N/A")
            }
            extracted_list.append(extracted_text)
        except Exception as e:
            print("Error parsing document:", doc)
            print("Error:", e)
    return extracted_list
    

In [6]:
query_vectorstore( job_description)

[{'name': 'Scott Saunders', 'email': 'scottsaunders13@gmail.com'},
 {'name': 'Brian Hurley', 'email': 'brianhurley66@gmail.com'},
 {'name': 'Brian Hurley', 'email': 'brianhurley66@gmail.com'},
 {'name': 'April Duarte', 'email': 'aprilduarte34@gmail.com'},
 {'name': 'Scott Saunders', 'email': 'scottsaunders13@gmail.com'},
 {'name': 'April Duarte', 'email': 'aprilduarte34@gmail.com'}]

In [None]:
import chromadb

client = chromadb.PersistentClient(path=r"c:\\Users\\kanis\\Resume_Shortlisting")
collection = client.get_or_create_collection(name="resumes")

def save_to_chromaDB(summarized_text):
    collection.add(
        documents=[summarized_text],
        ids=[str(hash(summarized_text))]
    )

In [None]:
# graphs/cv_graph.py
from langgraph.graph import StateGraph, END
from agents.cv_agents.file_extractor import extract_zip
from agents.cv_agents.file_summarizer import cv_summary
from agents.cv_agents.file_text_extractor import extract_text_from_pdf

# Define graph state structure
cv_graph_builder = StateGraph()

cv_graph_builder.add_node("extract_pdf", extract_zip)
cv_graph_builder.add_node("extract_text", extract_text_from_pdf)
cv_graph_builder.add_node("summarize_cv", cv_summary)
cv_graph_builder.add_node("save_to_vectorstore", save_to_chromaDB)

cv_graph_builder.set_entry_point("extract_pdf")
cv_graph_builder.add_edge("extract_pdf","extract_text")
cv_graph_builder.add_edge("extract_text", "summarize_cv")
cv_graph_builder.add_edge("summarize_cv", "save_to_vectorstore")
cv_graph_builder.add_edge("save_to_vectorstore", END)

cv_graph = cv_graph_builder.compile()