# General Resume Summary

In [None]:
import torch
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.types import BaseModel
import src.utils as utils
import src.prompts as prompts
import src.postgresDB.pgstore as pgstore
from typing import List

Define embeddings and llm

In [None]:
embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs={
        "torch_dtype": "float16"
        },
    device="cuda",
    embed_batch_size=16,
    normalize=True,
    parallel_process=True,
    show_progress_bar=True
)

Settings.embed_model = embed_model
llm = OpenAI(model = "gpt-4.1")
Settings.llm = llm

Connect to Vector Store (PostgreSQL)

In [None]:
vector_store = pgstore.Vector_Store(connection_string="", db_name="resume_db").create_index()

In [None]:
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [None]:
from typing import List, Optional
from pydantic import BaseModel, EmailStr, HttpUrl


class Experience(BaseModel):
    title: str
    company: str
    start_date: str
    end_date: Optional[str] = None
    description: Optional[str] = None


class Education(BaseModel):
    institution: str
    degree: str
    field_of_study: str
    start_date: str
    end_date: Optional[str] = None


class Project(BaseModel):
    name: str
    description: str
    technologies: List[str]


class Certification(BaseModel):
    name: str
    issuer: str
    date: Optional[str] = None


class ResumeContent(BaseModel):
    """A class representing the content of a resume."""

    name: Optional[str] = None
    email: Optional[EmailStr] = None
    phone: Optional[str] = None
    linkedin: Optional[HttpUrl] = None
    github: Optional[HttpUrl] = None
    skills: List[str]
    experiences: List[Experience]
    education: List[Education]
    projects: List[Project]
    certifications: List[Certification]
    summary: str


In [None]:
summarizer = TreeSummarize(verbose=True, output_cls=ResumeContent)

In [None]:
query_engine = index.as_query_engine()
retrieval_response = query_engine.query("Summarize all resumes in the database.")

text_chunks = [node.node.get_content() for node in retrieval_response.source_nodes]

# Run the TreeSummarize over the text chunks
response = summarizer.get_response(
    "Summarize the resumes.",
    text_chunks=text_chunks
)
print(response)

In [None]:
print(text_chunks)