In [2]:
from dotenv import load_dotenv

load_dotenv('../.env')

True

In [15]:
from pydantic import BaseModel, EmailStr, Field
from typing import Optional

class PersonalDetails(BaseModel):
    name: Optional[str] = Field(
        None, description="Full name of the candidate."
    )
    email: Optional[EmailStr] = Field(
        None, description="Candidate's primary email address."
    )
    mobile: Optional[str] = Field(
        None, description="Candidate's mobile phone number including country code if applicable."
    )

from pydantic import BaseModel, Field
from typing import List, Optional

class (BaseModel):
    title: Optional[str] = Field(
        None, description="Title or name of the project."
    )
    description: Optional[str] = Field(
        None, description="Short overview or summary of the project."
    )
    technologies: List[str] = Field(
        default_factory=list, description="List of technologies or tools used in the project."
    )


from pydantic import BaseModel, Field
from typing import Optional

class WorkExperience(BaseModel):
    company: Optional[str] = Field(
        None, description="Name of the company where the candidate worked."
    )
    title: Optional[str] = Field(
        None, description="Job title held at the company."
    )
    duration: Optional[str] = Field(
        None, description="Employment period (e.g., 'Jan 2018 - Dec 2020')."
    )
    responsibilities: Optional[str] = Field(
        None, description="Brief description of the candidate's responsibilities or achievements in the role."
    )
    projects: List[Project] = Field(
            default_factory=list, description="A list of projects the candidate has worked on" 
    )


from pydantic import BaseModel, Field
from typing import List

class Skills(BaseModel):
    skills: List[str] = Field(
        default_factory=list, description="List of the candidate's technical and soft skills."
    )

from pydantic import BaseModel, Field
from typing import Optional

class Education(BaseModel):
    institution: Optional[str] = Field(
        None, description="Name of the educational institution."
    )
    degree: Optional[str] = Field(
        None, description="Degree or certification earned by the candidate."
    )
    year: Optional[str] = Field(
        None, description="Time period of attendance or graduation year."
    )



In [7]:
import json
file_path = 'data/mr.json'

with open(file_path,'r') as f:
    resume_info = json.load(f)
for meta in resume_info:
    #print(meta)
    print(resume_info[meta])
    print('='*100)

Personal Details:
Name: Mohan Reddy Pallavula
Email: mohanreddy.pmg@gmail.com
Phone: +91-8309913459
LinkedIn: linkedin/mohanreddypallavula
GitHub: github/mohanreddypallavula
Education:
- Bachelor of Technology - Computer Science and Engineering; GPA: 8.0
  JNTUA Engineering College, Kalikiri, AP, India (July 2017 - June 2021)
- Intermediate - MPC; Per: 96.7
  Sri Chaitanya Junior College, Tirupati, AP, India (July 2015 - June 2017)
- AP Residential School, Gyaram Palli, AP, India
  SSC ; GPA: 9.5 (July 2008 - Apr 2015)
Work Experience:
- AI Engineer at Matdun Labs India Pvt. Ltd (Remote, Dec 2021 - Present)
  • Face Recognition system : Developed an advanced face recognition system utilizing SOTA based models for detecting and recognition the face and deployed on edge device (jetson nano) and kubernetes cluster (Nvidia Gpus). Optimized the models using tensorrt to reduce latency and Used Nvidia Triton server for dynamic batching, GPU and CPU optimization, and robust scalability, ensuri

In [8]:
resume_info.keys()

dict_keys(['personal_details', 'education', 'work_experience:', 'skills', 'projects', 'certifications_awards_achievements', 'publications_research'])

In [10]:
from prompts import *

msg_input = work_experience_prompt.format(resume_content=resume_info['work_experience:']) + '\n' + resume_template_instructions

In [12]:
print(msg_input)

You are provided with a resume document in plain text format. Your task is to extract structured candidate details from the resume.
Resume Content:
Work Experience:
- AI Engineer at Matdun Labs India Pvt. Ltd (Remote, Dec 2021 - Present)
  • Face Recognition system : Developed an advanced face recognition system utilizing SOTA based models for detecting and recognition the face and deployed on edge device (jetson nano) and kubernetes cluster (Nvidia Gpus). Optimized the models using tensorrt to reduce latency and Used Nvidia Triton server for dynamic batching, GPU and CPU optimization, and robust scalability, ensuring efficient and high-performance inference for deployed AI models. Tech: Tensorrt , Nvidia Jetson Nano , Kubernetes , Docker , FastAPI , Django , Web sockets , Grpc ,Pytorch , Opencv , Scikit-learn , PostgresSQL , Azure blob , GIT
  • Video Analytics System : Delevoped an advanced AI-powered solution designed for real-time monitoring and analysis of video streams. It offers

In [13]:
from langchain_ollama import ChatOllama

llm_ollama = ChatOllama(temperature=0, model="deepseek-r1:1.5b")
llm_ollama.invoke("Hello, how are you?").content

"<think>\n\n</think>\n\nHello! I'm just a virtual assistant, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today? 😊"

In [16]:
res_ollama = llm_ollama.invoke(msg_input)

In [17]:
from langchain_core.output_parsers import JsonOutputParser

parser = JsonOutputParser(pydantic_object = WorkExperience)

json_out = parser.invoke(res_ollama)

print(json_out)

[{'Company': 'Matdun Labs India Pvt. Ltd', 'Title': 'AI Engineer', 'Duration': 'Remote, Dec 2021 - Present', 'Responsibilities': ['Developed an advanced face recognition system utilizing SOTA based models for detecting and recognition the face and deployed on edge device (jetson nano) and Kubernetes cluster (Nvidia Gpus). Optimized the models using tensorrt to reduce latency and Used Nvidia Triton server for dynamic batching, GPU and CPU optimization, and robust scalability, ensuring efficient and high-performance inference for deployed AI models.', 'Developed an advanced AI-powered solution designed for real-time monitoring and analysis of video streams. It offers features such as person tracking, which enables precise identification and movement analysis, and person analytics, including metrics like waiting time and behavior patterns. The system incorporates specialized detection capabilities, such as weapon detection for enhanced security and fall detection for safety monitoring in 

In [19]:
json_out[1]

{'Company': 'Capillary Technologies Pvt. Ltd',
 'Title': 'ML Intern',
 'Duration': 'Remote, June 2021 - Nov 2021',
 'Responsibilities': ['Developed an AI to detect person age group and fashion type using person tracking.',
  'Summary: Analyzes customer behavior in the store.'],
 'Projects': [{'Title': 'Smart Store',
   'Description': 'Developed an AI to detect person age group and fashion type. The system uses person tracking to analyze customer behavior in the store.',
   'Technologies': ['Deep Learning',
    'AWS',
    'Computer Vision',
    'Nodejs',
    'Docker',
    'Git']},
  {'Title': 'Fake News Classification',
   'Description': 'Used TF-IDF, Word2Vec, and Transformer embeddings for fake news detection. The system classifies fake news based on text analysis techniques.',
   'Technologies': ['Machine Learning',
    'NLTK',
    'Scikit-learn',
    'Tf-idf',
    'Deep learning',
    'word2vec']}]}

In [21]:
msg_input = projects_prompt.format(resume_content=resume_info['projects']) + '\n' + resume_template_instructions

In [23]:
print(msg_input)

You are provided with a resume document in plain text format. Your task is to extract structured candidate details from the resume.
Resume Content:
Projects:
- Implemented Tinyllama from scratch in pytorch : Built a lightweight version of the LLaMA language model from the ground up using PyTorch, focusing on replicating core transformer architecture with an emphasis on model efficiency and size reduction. Loaded the pretrained weights into our architecture and implemented inference pipeline. Implemented client and server using grpc python. Tech: PyTorch, Transformers, Attention Mechanisms, Deep Learning , KV cache , Grpc , LLM, GenAI .
- Image Captioning using Deep Learning (NLP, CV) : Developed an advanced image captioning system using an encoder-decoder architecture to automatically generate coherent and contextually relevant textual descriptions from images. Tech: Python, Tensorflow, Streamlit , and Deep Learning
- Semantic search engine on stackoverflow python data (NLP) : Develope

In [24]:
res_ollama = llm_ollama.invoke(msg_input)

In [26]:
parser = JsonOutputParser(pydantic_object = Project)

json_out = parser.invoke(res_ollama)

{'title': 'Implementation of Tinyllama from scratch', 'description': 'Built a lightweight version of the LLaMA language model from the ground up using PyTorch, focusing on replicating core transformer architecture with an emphasis on model efficiency and size reduction.Loaded the pretrained weights into our architecture and implemented inference pipeline.', 'technologies': ['PyTorch', 'Transformers', 'Attention Mechanisms', 'Deep Learning']}
{'title': 'Image Captioning using Deep Learning', 'description': 'Developed an advanced image captioning system using an encoder-decoder architecture to automatically generate coherent and contextually relevant textual descriptions from images.', 'technologies': ['Python', 'TensorFlow', 'Streamlit', 'Deep Learning']}
{'title': 'Semantic search engine on stackoverflow python data', 'description': 'Developed a robust semantic search engine designed to enhance search accuracy and relevance within StackOverflow Python data by leveraging advanced natura

In [27]:
json_out[0]

{'title': 'Implementation of Tinyllama from scratch',
 'description': 'Built a lightweight version of the LLaMA language model from the ground up using PyTorch, focusing on replicating core transformer architecture with an emphasis on model efficiency and size reduction.Loaded the pretrained weights into our architecture and implemented inference pipeline.',
 'technologies': ['PyTorch',
  'Transformers',
  'Attention Mechanisms',
  'Deep Learning']}

In [43]:

from pydantic import BaseModel
from typing import List


class ResumeSkills(BaseModel):
    technical_skills: List[str]
    soft_skills: List[str]
    domain_specific_skills: List[str]
    tools_and_platforms: List[str]
    languages: List[str]


In [44]:
msg_input = skills_prompt.format(resume_content=resume_info['skills']) + '\n' + resume_template_instructions

In [45]:
print(msg_input)

You are a professional resume analyzer.
Your task is to extract all **skills** mentioned in the given resume. Group them into the following categories:

1. **Technical Skills** – programming languages, frameworks, libraries, dev tools, databases, cloud services, etc.
2. **Soft Skills** – communication, leadership, time management, problem-solving, etc.
3. **Domain-Specific Skills** – subject matter expertise or industry-specific knowledge (e.g., fintech, robotics, bioinformatics).
4. **Tools & Platforms** – IDEs, productivity tools, software platforms (e.g., Git, JIRA, Figma).
5. **Languages** – spoken languages (e.g., English, Spanish, German).

Return the output as a structured JSON object with each skill category containing a list of strings. Do not include explanations or extra formatting — only valid JSON.
use these names for json keys technical_skills , soft_skills , domain_specific_skills , tools_and_platforms , languages
If a skill category is not present, return an empty list 

In [47]:
res_ollama = llm_ollama.invoke(msg_input)

In [48]:
parser = JsonOutputParser(pydantic_object = ResumeSkills)

json_out = parser.invoke(res_ollama)

print(json_out)

{'technical_skills': ['Data Science', 'Machine Learning', 'Deep Learning', 'NLP', 'Computer Vision', 'Predictive Modeling', 'Decision Analytics', 'Large Language Models (LLMs)', 'Generative AI', 'Python', 'C', 'C++', 'PyTorch', 'Scikit-learn', 'FastAPI', 'Django', 'Flask', 'Nvidia Deepestream', 'Nvidia Triton Server', 'Langgraph', 'Langchain', 'LitGPT', 'Litdata', 'PyTorch Lightning', 'Huggingface libraries ( Transformers, peft)'], 'soft_skills': ['Leadership', 'Time Management'], 'domain_specific_skills': ['Data Science', 'Machine Learning', 'Deep Learning', 'NLP', 'Computer Vision', 'Predictive Modeling', 'Decision Analytics', 'Large Language Models (LLMs)', 'Generative AI'], 'tools_and_platforms': ['Kubernetes', 'Docker', 'Git', 'PostgreSQL', 'Redis', 'Gitlab CI-CD'], 'languages': ['Python', 'C', 'C++']}


In [50]:
json_out

{'technical_skills': ['Data Science',
  'Machine Learning',
  'Deep Learning',
  'NLP',
  'Computer Vision',
  'Predictive Modeling',
  'Decision Analytics',
  'Large Language Models (LLMs)',
  'Generative AI',
  'Python',
  'C',
  'C++',
  'PyTorch',
  'Scikit-learn',
  'FastAPI',
  'Django',
  'Flask',
  'Nvidia Deepestream',
  'Nvidia Triton Server',
  'Langgraph',
  'Langchain',
  'LitGPT',
  'Litdata',
  'PyTorch Lightning',
  'Huggingface libraries ( Transformers, peft)'],
 'soft_skills': ['Leadership', 'Time Management'],
 'domain_specific_skills': ['Data Science',
  'Machine Learning',
  'Deep Learning',
  'NLP',
  'Computer Vision',
  'Predictive Modeling',
  'Decision Analytics',
  'Large Language Models (LLMs)',
  'Generative AI'],
 'tools_and_platforms': ['Kubernetes',
  'Docker',
  'Git',
  'PostgreSQL',
  'Redis',
  'Gitlab CI-CD'],
 'languages': ['Python', 'C', 'C++']}

In [51]:
msg_input = education_prompt.format(resume_content=resume_info['education']) + '\n' + resume_template_instructions 

In [53]:
print(msg_input)

You are provided with a resume document in plain text format. Your task is to extract structured candidate details from the resume.
Resume Content:
Education:
- Bachelor of Technology - Computer Science and Engineering; GPA: 8.0
  JNTUA Engineering College, Kalikiri, AP, India (July 2017 - June 2021)
- Intermediate - MPC; Per: 96.7
  Sri Chaitanya Junior College, Tirupati, AP, India (July 2015 - June 2017)
- AP Residential School, Gyaram Palli, AP, India
  SSC ; GPA: 9.5 (July 2008 - Apr 2015)
Extract Education Details:
Please extract the candidate’s educational background from the resume text. For each entry, include:
 - Institution: The name of the school, college, or university.
 - Degree: The degree or certification earned.
 - Year: The time period of attendance or the graduation year.
Return these details as a JSON array. If the candidate provides no education details, return an empty array.
Instructions:
Read the resume content provided below.
 - Extract the information for each 

In [54]:
res_ollama = llm_ollama.invoke(msg_input)

In [57]:
parser = JsonOutputParser(pydantic_object = Education)

json_out = parser.invoke(res_ollama)

In [56]:
json_out

[{'Institution': 'JNTUA Engineering College',
  'Degree': 'Bachelor of Technology - Computer Science and Engineering',
  'Year': '2021'},
 {'Institution': 'Sri Chaitanya Junior College',
  'Degree': 'Intermediate - MPC',
  'Year': '2017'},
 []]

In [60]:
publications_prompt = """You are an expert resume parser. Your task is to extract all **publications and research** mentioned in the resume and return the result as a **JSON array** of structured objects.
Each item should include the following fields (if available):
- **title**: Title of the paper or research work
- **authors**: List of authors (including the resume owner if named)
- **publication_venue**: Journal, conference, or platform where it was published
- **year**: Year of publication (integer)
- **doi_or_link**: DOI or a direct link to the publication (if available)
- **description**: A short summary or abstract (if present)

Return only valid JSON. If no publications or research work is found, return an empty list.
Now extract this information from the resume below:
{resume_content}
"""

data_pub = """Publications & Research
"Enhancing Image Captioning Using Transformer Networks"
John Doe, Priya Patel, Ankit Sharma
Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2023
https://doi.org/10.1109/CVPR.2023.00456
Developed a novel transformer-based encoder-decoder architecture that improves caption generation for complex visual scenes.

"Real-Time Object Detection Using YOLOv5 and TensorRT"
John Doe, Rajesh Iyer
International Journal of Computer Applications (IJCA), Vol. 185, No. 12, 2022
Focused on deploying YOLOv5 models optimized with TensorRT for edge devices, achieving inference speeds of up to 100 FPS.

"Survey on Federated Learning Techniques for Privacy-Preserving AI"
John Doe, Sneha Reddy
ArXiv Preprint, 2021
https://arxiv.org/abs/2106.12345
A comprehensive review of federated learning approaches, covering optimization challenges and privacy guarantees.
"""

from pydantic import BaseModel, HttpUrl
from typing import List, Optional


class Publication(BaseModel):
    title: str
    authors: List[str]
    publication_venue: Optional[str] = None
    year: Optional[int] = None
    doi_or_link: Optional[HttpUrl] = None
    description: Optional[str] = None


In [68]:
msg_input = publications_prompt.format(resume_content=data_pub) #+ '\n' + resume_template_instructions

print(msg_input)

You are an expert resume parser. Your task is to extract all **publications and research** mentioned in the resume and return the result as a **JSON array** of structured objects.
Each item should include the following fields (if available):
- **title**: Title of the paper or research work
- **authors**: List of authors (including the resume owner if named)
- **publication_venue**: Journal, conference, or platform where it was published
- **year**: Year of publication (integer)
- **doi_or_link**: DOI or a direct link to the publication (if available)
- **description**: A short summary or abstract (if present)

Return only valid JSON. If no publications or research work is found, return an empty list.
Now extract this information from the resume below:
Publications & Research
"Enhancing Image Captioning Using Transformer Networks"
John Doe, Priya Patel, Ankit Sharma
Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2023
https://doi.org/10.1109/CVPR.20

In [69]:
res_ollama = llm_ollama.invoke(msg_input)

In [70]:
parser = JsonOutputParser(pydantic_object = Publication)

json_out = parser.invoke(res_ollama)

In [72]:
json_out

[{'title': 'Enhancing Image Captioning Using Transformer Networks',
  'authors': ['John Doe', 'Priya Patel', 'Ankit Sharma'],
  'publication_venue': 'Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)',
  'year': 2023,
  'doi_or_link': 'https://doi.org/10.1109/CVPR.2023.00456',
  'description': 'Improving caption generation for complex visual scenes.'},
 {'title': 'Developed a novel transformer-based encoder-decoder architecture that improves caption generation for complex visual scenes.',
  'authors': ['John Doe', 'Rajesh Iyer'],
  'publication_venue': 'International Journal of Computer Applications (IJCA)',
  'year': 2022,
  'doi_or_link': 'https://doi.org/10.1109/CVPR.2023.00456',
  'description': 'Focusing on deploying YOLOv5 models optimized with TensorRT for edge devices, achieving inference speeds of up to 100 FPS.'},
 {'title': 'Survey on Federated Learning Techniques for Privacy-Preserving AI',
  'authors': ['John Doe', 'Sneha Reddy'],
  'publ