<a href="https://colab.research.google.com/github/prtkmhn/ATS-Resume-Reviewer/blob/main/ATS_UpdateResumeAi%2BChatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! apt install tesseract-ocr
! apt install libtesseract-dev

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 45 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]
Fetched 4,816 kB in 12s (405 kB/s)
Selecting previously unselected package tesseract-ocr-eng.
(Reading database ... 121885 files and directories currently installed.)
Preparing to unpack .../tesseract-ocr-e

In [None]:
!pip install -q gradio python-dotenv google-generativeai pytesseract

In [42]:
import gradio as gr
import google.generativeai as genai
from google.colab import userdata
from PIL import Image
import pytesseract
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os
import base64
import textwrap
from IPython.display import Markdown

In [45]:
genai.configure(api_key=userdata.get("GOOGLE_API_KEY"))
txt_model = genai.GenerativeModel('gemini-pro')
vis_model = genai.GenerativeModel('gemini-pro-vision')

In [52]:
def extract_text(image_path):
    image = Image.open(image_path)
    return pytesseract.image_to_string(image)

def calculate_cosine_similarity(text1, text2):
    vectorizer = TfidfVectorizer()
    tfidf = vectorizer.fit_transform([text1, text2])
    return float(cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0])

def update_resume(job_info, old_resume, similarity_score):
    prompt = f"""Act Like a skilled or very experience ATS(Application Tracking System) with a deep understanding of tech field,software engineering,data science ,data analyst and big data engineer. Your task is to evaluate the resume based on the given job description. You must consider the job market is very competitive and you should provide
best assistance for improving thw resumes. Based on the cosine similarity score of {similarity_score:.2f}, update the resume to better match the job description. Resume: {old_resume} Job Description: {job_info}
I want the response as per below structure{{"JD Match": "%", "MissingKeywords": [], "Profile Summary": ""}}"""
    response = txt_model.generate_content(prompt)
    return response.text

In [53]:
def process_application(image_path, resume_path):
    job_text = extract_text(image_path)
    with open(resume_path, 'r') as file:
        resume_text = file.read()
    similarity_score = calculate_cosine_similarity(job_text, resume_text)
    updated_resume = update_resume(job_text, resume_text, similarity_score)
    return job_text, str(similarity_score), updated_resume

def image_to_base64(image_path):
    with open(image_path, 'rb') as img:
        encoded_string = base64.b64encode(img.read())
    return encoded_string.decode('utf-8')

def query_message(history, txt, img):
    if not img:
        history += [(txt, None)]
        return history, "", history
    base64 = image_to_base64(img)
    data_url = f"data:image/jpeg;base64,{base64}"
    history += [(f"{txt} ![]({data_url})", None)]
    return history, "", history

In [54]:
def llm_response(history, text, img):
    if not img:
        response = txt_model.generate_content(text)
        history += [(None, response.text)]
        return history, "", history
    else:
        img = Image.open(img)
        response = vis_model.generate_content([text, img])
        history += [(None, response.text)]
        return history, "", history
def initialize_chat_history(job_text, resume_text):
    initial_prompt = f"Here is the job description:\n{job_text}\n\nAnd here is the resume:\n{resume_text}\n\nPlease keep this information in mind as we proceed with our conversation."
    response = txt_model.generate_content(initial_prompt)
    return [(None, response.text)]


In [55]:

with gr.Blocks() as demo:
    gr.Markdown("### Resume and Job Application Assistant")
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="filepath", label="Upload Job Description Image")
            resume_input = gr.File(type="filepath", label="Upload Your Resume (txt format)")
            process_button = gr.Button("Process Application")
            outputs = [
                gr.Textbox(label="Extracted Job Info"),
                gr.Number(label="Cosine Similarity Score"),
                gr.Textbox(label="Updated Resume")
            ]
        with gr.Column():
            image_box = gr.Image(type="filepath", label="Upload Image for Chatbot")
            chatbot = gr.Chatbot(scale=2, height=750)
            text_box = gr.Textbox(placeholder="Enter text and press enter, or upload an image", container=False)
            btn = gr.Button("Submit")

    job_text = gr.State()
    resume_text = gr.State()
    chat_history = gr.State([])

    image_input.change(lambda x: extract_text(x), inputs=[image_input], outputs=[job_text])
    resume_input.change(lambda x: open(x, 'r').read(), inputs=[resume_input], outputs=[resume_text])

    process_button.click(process_application, inputs=[image_input, resume_input], outputs=outputs).then(
        initialize_chat_history, inputs=[job_text, resume_text], outputs=[chat_history]
    )

    btn.click(query_message, [chat_history, text_box, image_box], [chat_history, text_box, chatbot]).then(
        llm_response, [chat_history, text_box, image_box], [chat_history, text_box, chatbot]
    )

demo.queue()
demo.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://d4b2bc19302d280b7c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://d4b2bc19302d280b7c.gradio.live


