In [4]:
pip install streamlit sentence-transformers PyPDF2 python-docx flask

Collecting sentence-transformers
  Downloading sentence_transformers-3.2.0-py3-none-any.whl.metadata (10 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading sentence_transformers-3.2.0-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.2/255.2 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx, PyPDF2, sentence-transformers
Successfully installed PyPDF2-3.0.1 python-docx-1.1.2 sentence-transformers-3.2.0


## Implementing CV matching logic and Streamlit page

In [10]:
%%writefile app.py

import streamlit as st
from sentence_transformers import SentenceTransformer, util
import PyPDF2
from docx import Document

# Load the lightweight MiniLM model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Text extraction functions
def extract_text_from_pdf(pdf_file):
    reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in range(len(reader.pages)):
        text += reader.pages[page].extract_text()
    return text

def extract_text_from_docx(docx_file):
    doc = Document(docx_file)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text

# Function to match job description to CVs
def match_cv_to_job_description(cvs, job_description):
    job_embedding = model.encode(job_description, convert_to_tensor=True)
    scores = []
    for cv in cvs:
        cv_embedding = model.encode(cv, convert_to_tensor=True)
        score = util.pytorch_cos_sim(cv_embedding, job_embedding)
        scores.append(score.item())
    return scores


# Streamlit UI
st.title("CV Matching to Job Description")

# Job description input
job_description = st.text_area("Job Description", "Enter the job description here")

# Upload multiple CVs
uploaded_files = st.file_uploader("Upload CVs (PDF or DOCX)", type=['pdf', 'docx'], accept_multiple_files=True)

if st.button("Match CVs"):
    if job_description and uploaded_files:
        cvs = []
        for uploaded_file in uploaded_files:
            # Extract text from each uploaded file
            if uploaded_file.name.endswith('.pdf'):
                cvs.append(extract_text_from_pdf(uploaded_file))
            elif uploaded_file.name.endswith('.docx'):
                cvs.append(extract_text_from_docx(uploaded_file))

        # Get similarity scores
        scores = match_cv_to_job_description(cvs, job_description)

        # Display the scores
        st.write("Matching Scores:")
        for i, score in enumerate(scores):
            st.write(f"CV {i+1}: {score:.4f}")
    else:
        st.error("Please upload at least one CV and provide a job description.")

Writing app.py


## Hosting my Page

In [14]:
!pip install pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [None]:
!ngrok authtoken YOUR_AUTH_TOKEN_HERE

In [None]:
from pyngrok import ngrok
import os

# Run the Streamlit app in the background
os.system('streamlit run app.py &')

# Open an ngrok tunnel on HTTP protocol
public_url = ngrok.connect(8501, "http")
print(f"Public URL: {public_url}")
