# General Resume Summary

In [19]:
import torch
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.types import BaseModel
import src.utils as utils
import src.prompts as prompts
import src.postgresDB.pgstore as pgstore
from typing import List

Define embeddings and llm

In [20]:
embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs={
        "torch_dtype": "float16"
        },
    device="cuda",
    embed_batch_size=16,
    normalize=True,
    parallel_process=True,
    show_progress_bar=True
)

Settings.embed_model = embed_model
llm = OpenAI(model = "gpt-4.1")
Settings.llm = llm

Connect to Vector Store (PostgreSQL)

In [21]:
vector_store = pgstore.Vector_Store(connection_string="postgresql://postgres:123456@localhost:5432", db_name="resume_db").create_index()

In [22]:
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [23]:
from typing import List, Optional
from pydantic import BaseModel, EmailStr, HttpUrl


class Experience(BaseModel):
    title: str
    company: str
    start_date: str
    end_date: Optional[str] = None
    description: Optional[str] = None


class Education(BaseModel):
    institution: str
    degree: str
    field_of_study: str
    start_date: str
    end_date: Optional[str] = None


class Project(BaseModel):
    name: str
    description: str
    technologies: List[str]


class Certification(BaseModel):
    name: str
    issuer: str
    date: Optional[str] = None


class ResumeContent(BaseModel):
    """A class representing the content of a resume."""

    name: Optional[str] = None
    email: Optional[EmailStr] = None
    phone: Optional[str] = None
    linkedin: Optional[HttpUrl] = None
    github: Optional[HttpUrl] = None
    skills: List[str]
    experiences: List[Experience]
    education: List[Education]
    projects: List[Project]
    certifications: List[Certification]
    summary: str


In [24]:
summarizer = TreeSummarize(verbose=True, output_cls=ResumeContent)

In [25]:
query_engine = index.as_query_engine()
retrieval_response = query_engine.query("Summarize all resumes in the database.")

text_chunks = [node.node.get_content() for node in retrieval_response.source_nodes]

# Run the TreeSummarize over the text chunks
response = summarizer.get_response(
    "Summarize the resumes.",
    text_chunks=text_chunks
)
print(response)

Chunks: 100%|██████████| 1/1 [00:00<00:00,  3.10it/s]


1 text chunks after repacking
name=None email=None phone=None linkedin=None github=None skills=['Python', 'MATLAB', 'SQL', 'pandas', 'scikit-learn', 'matplotlib', 'ArcGIS', 'Seismic Unix', 'Git', 'Feature engineering', 'regression modeling', 'cognitive neural networks', 'model evaluation (MSE, R2)', 'ANOVA', 'Statistical analysis', 'preprocessing pipelines', 'exploratory data analysis', 'data visualization', 'quality control', '2D/3D seismic data interpretation', 'petrophysical analysis', 'subsurface modeling', 'reservoir characterization', 'GeoPandas', 'Machine Learning', '3D Spatial Analysis', 'ArcPy', 'QGIS', 'Raster Data', 'NumPy', 'tensorflow', 'PyTorch', 'seaborn', 'PostgreSQL', 'Jupyter Notebooks', 'VS Code', 'Linux', 'computer science'] experiences=[] education=[] projects=[] certifications=[Certification(name='Machine Learning with PyTorch', issuer='Udacity', date='February 2025'), Certification(name='Data Analyst', issuer='Udacity', date='December 2024'), Certification(name='

In [16]:
print(text_chunks)

['Skills \nProgramming and Data Analysis: Python, MATLAB, SQL, pandas, scikit-learn, matplotlib, ArcGIS, Seismic \nUnix, version control (Git) \nMachine Learning: Feature engineering, regression modeling, cognitive neural networks, model evaluation \n(MSE, R2), ANOV A \nData Science: Statistical analysis, preprocessing pipelines, exploratory data analysis, data visualization, quality \ncontrol \nGeophysics: 2D/3D seismic data interpretation, petrophysical analysis, subsurface modeling, reservoir \ncharacterization. \n \nPublications \nConference Presentations \n• Petras, B., Tracy, R., Duguid, A., Gupta, N. (2023). Assessing Wellbore Integrity Risk in the Gulf of \nMexico for Potential CO2 Storage Application in Depleted Fields. SECARB Offshore and GOMCarb \nJoint Meeting, Austin, TX. \n• Petras, B., Tracy, R., Chundur, S., et al. (2023). An Overview of Submitted EP A UIC Class VI Permit \nApplications. MRCI Stakeholder Meeting, Morgantown, WV . \nThesis \n• Tracy, R. (2023). Geophysic