In [69]:
# Install all dependencies using:
!pip install -r requirements.txt

In [215]:
import os

# Provide OpenAI API key
openai_api_key = input('Enter you OpenAI API Key: ')

In [84]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain

In [5]:
# Define path to resume
myResume = "/content/aravind-resume-palepu.pdf"
pdf_resume = PdfReader(myResume)

In [70]:
# extrat text from each page separately
txt = ""
for page in pdf_resume.pages:
    txt += page.extract_text()

print(txt)

ARAVIND RAJ PALEPU   
Richardson, TX – 75082, (314)-386-7056 | aravindrajpalepu@gmail.com  | LinkedIn | GitHub | Tableau Public   
 
EDUCATION   
University of Texas at Dallas                    May 2024  
Master of Science, Business Analytics - Data Science Track         
Symbiosis International University                               May 2021  
Bachelor of Business Administration and Bachelor of Laws 
                               
TECHNICAL SKILLS   
Technical: SkLearn, XGBoost, PyTorch, FastAI, OpenCV2, NLTK, HuggingFace Transformers, Langchain, Streamlit 
Analytical Tools: SQL, Python, R, Tableau, Power B.I , Google Analytics, VBA/Macros, MS Excel 
Data Management:  PySpark, Postgres, PL/pgSQL, MongoDB, Databricks, Apache Airflow, AWS Redshift 
Certifications:  AWS Solutions Architect, OCI Foundations, OCI AI Foundations, Alteryx Micro-Credentials 
 
PROFESSIONAL EXPERIENCE   
Smart Data Solutions, Eagan, Minnesota                January 2024 – May 2024 
Machine Learning Intern 

In [7]:
# Split the resume text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700,
                                               chunk_overlap=200,
                                               length_function=len)

resume_chunks = text_splitter.split_text(text=txt)
print(resume_chunks)

['ARAVIND RAJ PALEPU   \nRichardson, TX – 75082, (314)-386-7056 | aravindrajpalepu@gmail.com  | LinkedIn | GitHub | Tableau Public   \n \nEDUCATION   \nUniversity of Texas at Dallas                    May 2024  \nMaster of Science, Business Analytics - Data Science Track         \nSymbiosis International University                               May 2021  \nBachelor of Business Administration and Bachelor of Laws \n                               \nTECHNICAL SKILLS   \nTechnical: SkLearn, XGBoost, PyTorch, FastAI, OpenCV2, NLTK, HuggingFace Transformers, Langchain, Streamlit \nAnalytical Tools: SQL, Python, R, Tableau, Power B.I , Google Analytics, VBA/Macros, MS Excel', 'Analytical Tools: SQL, Python, R, Tableau, Power B.I , Google Analytics, VBA/Macros, MS Excel \nData Management:  PySpark, Postgres, PL/pgSQL, MongoDB, Databricks, Apache Airflow, AWS Redshift \nCertifications:  AWS Solutions Architect, OCI Foundations, OCI AI Foundations, Alteryx Micro-Credentials \n \nPROFESSIONAL EXP

In [105]:
# Similarity search and QA chat with OpenAI
def openai(openai_api_key, chunks, query):

    # Use OpenAI embedding
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Convert text into vectors using FAISS
    vectorstores = FAISS.from_texts(chunks, embedding=embeddings)

    # Similarity search, retrieve top 3 matches for the query
    docs = vectorstores.similarity_search(query=query, k=3)

    # ChatOpenAI with 'gpt-3.5-turbo'
    llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)

    # QA chain
    chain = load_qa_chain(llm=llm, chain_type='stuff')

    # Run the chain
    response = chain.run(input_documents=docs, question=query)
    return response

# Generate Cover Letter

In [36]:
def generate_cover_letter(resume_text):
    query = f'''
    Given the resume, give me a nice cover letter.
    {resume_text}
              '''
    return query

cover = generate_cover_letter(resume_text=resume_chunks)
cover_letter = openai(openai_api_key=openai_api_key, chunks=resume_chunks, query=cover)
print(cover_letter)

Dear Hiring Manager,

I am writing to express my interest in the position at your company. With a strong background in data science and business analytics, along with a passion for machine learning and natural language processing, I believe I would be a valuable asset to your team.

I recently completed my Master of Science in Business Analytics with a Data Science Track at the University of Texas at Dallas. During my studies, I gained hands-on experience with a variety of analytical tools such as SQL, Python, R, Tableau, and Power BI. I also have experience with machine learning libraries such as SkLearn, XGBoost, PyTorch, and FastAI. Additionally, I am proficient in data management tools such as PySpark, Postgres, MongoDB, and AWS Redshift.

In my most recent role as a Machine Learning Intern at Smart Data Solutions, I had the opportunity to fine-tune OCR solutions to extract health-related textual data from scanned documents, improving data accuracy. I also leveraged transformer-bas

# Resume Summary

In [37]:
def generate_resume_summary(resume_text):
    query = f'''
    Given the resume, give me a summary of the resume.
    {resume_text}
              '''
    return query

summary = generate_resume_summary(resume_text=resume_chunks)
resume_summary = openai(openai_api_key=openai_api_key, chunks=resume_chunks, query=summary)
print(f"Your resume summary: \n {resume_summary}")

Aravind Raj Palepu is a professional with a strong background in business analytics and data science. He has a Master of Science degree in Business Analytics with a Data Science Track from the University of Texas at Dallas, and a Bachelor of Business Administration and Bachelor of Laws from Symbiosis International University.

Aravind has a range of technical skills, including SkLearn, XGBoost, PyTorch, FastAI, OpenCV2, NLTK, HuggingFace Transformers, Langchain, and Streamlit. He is proficient in analytical tools such as SQL, Python, R, Tableau, Power B.I, Google Analytics, VBA/Macros, and MS Excel. He also has experience in data management tools like PySpark, Postgres, PL/pgSQL, MongoDB, Databricks, Apache Airflow, and AWS Redshift. Aravind holds certifications in AWS Solutions Architect, OCI Foundations, OCI AI Foundations, and Alteryx Micro-Credentials.

In terms of professional experience, Aravind worked as a Machine Learning Intern at Smart Data Solutions, where he fine-tuned OCR 

# Suggest Strengths

In [107]:
def generate_resume_strengths(resume_text):
    query = f'''
    Given the resume, give me the strengths of the resume.
    {resume_text}
              '''
    return query

strengths = generate_resume_strengths(resume_text=txt)
resume_strengths = openai(openai_api_key=openai_api_key, chunks=resume_chunks, query=strengths)
print(f"Your resume strengths: \n {resume_strengths}")

Your resume strengths: 
 Strengths of the resume include:

1. Strong educational background: The candidate holds a Bachelor's degree in Business Administration and Bachelor of Laws from Symbiosis International University and is currently pursuing a Master's degree in Business Analytics with a Data Science track from the University of Texas at Dallas.

2. Technical skills: The candidate has a wide range of technical skills, including SkLearn, XGBoost, PyTorch, FastAI, OpenCV2, NLTK, HuggingFace Transformers, Langchain, Streamlit, SQL, Python, R, Tableau, Power B.I, Google Analytics, VBA/Macros, MS Excel, PySpark, Postgres, PL/pgSQL, MongoDB, Databricks, Apache Airflow, and AWS Redshift.

3. Professional experience: The candidate has practical experience as a Machine Learning Intern at Smart Data Solutions, where they fine-tuned OCR solutions and leveraged transformer-based models for NER. They also have experience as a Data Intern at SplashBI, where they developed an employee attrition 

# Suggest Weaknesses

In [40]:
def generate_resume_weaknesses(resume_text):
    query = f'''
    Given the resume, give me the weaknesses of the resume.
    {resume_text}
              '''
    return query

weaknesses = generate_resume_weaknesses(resume_text=resume_chunks)
resume_weaknesses = openai(openai_api_key=openai_api_key, chunks=resume_chunks, query=weaknesses)
print(f"Your resume weaknesses: \n {resume_weaknesses}")

Your resume weaknesses: 
 Based on the given resume, it is difficult to identify any specific weaknesses. The resume highlights relevant education, technical skills, and professional experience. However, it is important to note that the resume does not mention any specific achievements or projects in the field of Data Science or Business Analytics. Additionally, there is limited information about the duration and scope of the professional experience. It would be beneficial to provide more specific details and accomplishments in these areas to strengthen the resume.


# Suggest Skills

In [41]:
def generate_skillSuggest(resume_text):
    query = f'''
    Given the resume, suggest me some skills/courses I need to work on.
    {resume_text}
              '''
    return query

skillSuggest = generate_skillSuggest(resume_text=resume_chunks)
skill_suggest = openai(openai_api_key=openai_api_key, chunks=resume_chunks, query=skillSuggest)
print(f"Your skill suggestions: \n {skill_suggest}")

Your skill suggestions: 
 Based on the provided resume, some skills/courses you may want to consider working on are:

1. Deep Learning: Since you already have experience with machine learning models, it would be beneficial to deepen your understanding of deep learning techniques and frameworks such as TensorFlow or Keras.

2. Natural Language Processing (NLP): You have worked on a project related to natural language processing, so further developing your skills in this area would be valuable. Consider exploring advanced NLP techniques, such as sentiment analysis, text generation, or language translation.

3. Data Management: While you have experience with SQL and databases like Postgres, expanding your knowledge in data management tools and technologies like PySpark, MongoDB, and Apache Airflow would be beneficial in handling large-scale data processing and analysis.

4. Cloud Computing: Given your experience with AWS Redshift and the AWS Solutions Architect certification, consider exp

# Skills Extract

In [59]:
import fitz

def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as pdf_document:
        num_pages = pdf_document.page_count
        for page_num in range(num_pages):
            page = pdf_document[page_num]
            text += page.get_text()

    return text

pdf_path = "/content/jd.pdf"
pdf_text = extract_text_from_pdf(pdf_path)

In [71]:
# Split the jd text into chunks
jd_text_splitter = RecursiveCharacterTextSplitter(chunk_size=700,
                                               chunk_overlap=200,
                                               length_function=len)

jd_chunks = jd_text_splitter.split_text(text=pdf_text)
print(jd_chunks)

['Company Description\nHarsco is recognized for technical leadership and worldwide experience in virtually all major\naspects of railway track maintenance. We enable railroads to operate at peak efficiency over\nsmooth, precisely aligned track that increases railway safety while reducing fuel consumption\nand other key operating costs. Our broad array of equipment and services support every type of\nrailway operator, from major national and international railway systems, to short lines and\nhigh-speed urban transit networks.\nJob Description\nWe are looking for a driven team member to contribute to the development of our technology', 'high-speed urban transit networks.\nJob Description\nWe are looking for a driven team member to contribute to the development of our technology\nproduct serving in area of automating railway track equipment and improving safety of the\nrailroad operation.\nThis presents an exceptional opportunity to enhance professional expertise and contribute to a\nspec

In [136]:
# Similarity search and QA chat with OpenAI
def jd_openai(openai_api_key, chunks, query):

    # Use OpenAI embedding
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Convert text into vectors using FAISS
    vectors = FAISS.from_texts(chunks, embedding=embeddings)

    # Similarity search, retrieve top 3 matches for the query
    docx = vectors.similarity_search(query=query, k=3)

    # ChatOpenAI with 'gpt-3.5-turbo'
    llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)

    # QA chain
    chain = load_qa_chain(llm=llm, chain_type='stuff')

    # Run the chain
    response = chain.run(input_documents=docx, question=query)
    return response

In [144]:
def recognize_keywords_jd(jd_text):
    query = f'''
    Given the job description, give me a list of all keywords. Segment into technical, professional and soft skills.
    {jd_text}
              '''
    return query

jd_res = recognize_keywords_jd(jd_text=pdf_text)
jd_keywords = openai(openai_api_key=openai_api_key, chunks=jd_chunks, query=jd_res)
print(f"Skill required for the job: \n {jd_keywords}")

Skill required for the job: 
 Technical Skills:
- C
- C++
- Linux platforms
- AI tools
- Machine learning models
- Neural networks
- Big data computation and storage tools
- Prototypes and datasets
- Model training and evaluations
- Integration of solutions
- Bench tests and onsite tests
- Tuning and monitoring
- CNN (Convolutional Neural Network)
- R-CNN type neural network
- Computer vision tasks
- Supervised and Semi-Supervised Learning
- Deep Learning
- Support Vector Machines
- Linear and Logistic Regression
- AI Frameworks (TensorFlow, Café, PyTorch, Keras, Darknet, OpenCV)
- AI edge devices (NVIDIA Jetson / Nano / Orin)
- Linux Operating System
- Statistical computer languages (R, Python, SQL)
- Data manipulation and analysis
- Data architectures
- Machine learning techniques (semantic segmentation, clustering, decision tree learning, artificial neural networks)
- Statistical techniques and concepts
- Edge computing
- Optimizing neural networks for low-power mobile platforms

Pr

In [140]:
def resume_keywords(resume_text):
    query = f'''
    Given the resume, give me a list of all my technical, professional and soft skills.
    {resume_text}
              '''
    return query

resume_res = resume_keywords(resume_text=txt)
resume_keywords = openai(openai_api_key=openai_api_key, chunks=resume_chunks, query=resume_res)
print(f"Skills you possess: \n {resume_keywords}")

Your skill suggestions: 
 Technical Skills:
- SkLearn
- XGBoost
- PyTorch
- FastAI
- OpenCV2
- NLTK
- HuggingFace Transformers
- Langchain
- Streamlit
- SQL
- Python
- R
- Tableau
- Power BI
- Google Analytics
- VBA/Macros
- MS Excel
- PySpark
- Postgres
- PL/pgSQL
- MongoDB
- Databricks
- Apache Airflow
- AWS Redshift

Professional Skills:
- Machine Learning
- OCR
- NER (Named Entity Recognition)
- Data Extraction
- Data Manipulation
- Data Analysis
- Employee Attrition Prediction
- Tableau Dashboard Development
- SQL Queries
- AWS Redshift
- Presentation Skills

Soft Skills:
- Communication
- Problem-Solving
- Teamwork
- Time Management
- Attention to Detail
- Analytical Thinking
- Presentation Skills


# Resume Analyzer

In [188]:
combined_text = "Resume: \n"
combined_text += txt
combined_text += "\n \n ------------- end of resume ------------- \n \nJob Description: \n "
combined_text += pdf_text
combined_text += "\n \n ------------- end of job description ------------- \n"

In [190]:
# Split the resume text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700,
                                               chunk_overlap=0,
                                               length_function=len)

combined_text_chunks = text_splitter.split_text(text=combined_text)
print(combined_text_chunks)

['Resume: \nARAVIND RAJ PALEPU   \nRichardson, TX – 75082, (314)-386-7056 | aravindrajpalepu@gmail.com  | LinkedIn | GitHub | Tableau Public   \n \nEDUCATION   \nUniversity of Texas at Dallas                    May 2024  \nMaster of Science, Business Analytics - Data Science Track         \nSymbiosis International University                               May 2021  \nBachelor of Business Administration and Bachelor of Laws \n                               \nTECHNICAL SKILLS   \nTechnical: SkLearn, XGBoost, PyTorch, FastAI, OpenCV2, NLTK, HuggingFace Transformers, Langchain, Streamlit \nAnalytical Tools: SQL, Python, R, Tableau, Power B.I , Google Analytics, VBA/Macros, MS Excel', 'Data Management:  PySpark, Postgres, PL/pgSQL, MongoDB, Databricks, Apache Airflow, AWS Redshift \nCertifications:  AWS Solutions Architect, OCI Foundations, OCI AI Foundations, Alteryx Micro-Credentials \n \nPROFESSIONAL EXPERIENCE   \nSmart Data Solutions, Eagan, Minnesota                January 2024 – May 2

In [206]:
# Similarity search and QA chat with OpenAI
def resume_scorer(openai_api_key, chunks, query):

    # Use OpenAI embedding
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Convert text into vectors using FAISS
    vectors = FAISS.from_texts(chunks, embedding=embeddings)

    # Similarity search, retrieve top 3 matches for the query
    docx = vectors.similarity_search(query=query)

    # ChatOpenAI with 'gpt-3.5-turbo'
    llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)

    # QA chain
    chain = load_qa_chain(llm=llm, chain_type='stuff')

    # Run the chain
    response = chain.run(input_documents=docx, question=query)
    return response

In [213]:
def resume_analyzer(combined_text):
    query = f"""
    Evaluate the resume based on the job description in the text below. Give me a score between 0 and 100. Both the
    {combined_text}
              """
    return query

query = resume_analyzer(combined_text=combined_text)
score_and_analysis = resume_scorer(openai_api_key=openai_api_key, chunks=combined_text_chunks, query=query)
print(f"Score: \n {score_and_analysis}")

Score: 
 Based on the job description provided, here's an evaluation of the resume:

1. Education: The candidate has a Master of Science degree in Business Analytics with a Data Science Track, which aligns with the job requirements. Additionally, the candidate has a Bachelor's degree in Business Administration and Bachelor of Laws, which may not be directly related to the job but still demonstrates a strong educational background. Score: 85

2. Technical Skills: The candidate has a strong technical skillset, including experience with SkLearn, XGBoost, PyTorch, FastAI, OpenCV2, NLTK, HuggingFace Transformers, Langchain, and Streamlit. They also have experience with SQL, Python, R, Tableau, Power B.I, Google Analytics, VBA/Macros, and MS Excel, which are relevant analytical tools. Additionally, they have experience with data management tools like PySpark, Postgres, PL/pgSQL, MongoDB, Databricks, Apache Airflow, and AWS Redshift. Score: 90

3. Professional Experience: The candidate has re