<a href="https://colab.research.google.com/github/surabhi13gupta/ResumeSummarizer/blob/UI/MainUI_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [79]:
!pip -qq install langchain_openai
!pip -qq install PyPDF2
!pip -qq install langchain_community
!pip -qq install faiss-gpu-cu11
!pip -qq install streamlit pyngrok
!pip -qq install streamlit_modal
!pip -qq install PyMuPDF
!pip -qq install python-docx
!pip -qq install docx2pdf

In [80]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [81]:
with open(".env", "w") as f:
    from google.colab import userdata
    GITHUB_TOKEN = userdata.get('GITHUB_TOKEN')
    f.write(f"GITHUB_TOKEN={GITHUB_TOKEN}\n")

    ngrok_key = userdata.get('NGROK_KEY')
    f.write(f'ngrok_token={ngrok_key}\n')

In [82]:
from pyngrok import ngrok
from dotenv import load_dotenv
load_dotenv()
import os
ngrok_token = os.getenv("ngrok_token")
!ngrok config add-authtoken {ngrok_token}

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [83]:
%%writefile resume_uploader.py
import zipfile
import os
import tempfile
from PyPDF2 import PdfReader
import docx

import sys
sys.path.append('/content/drive/MyDrive/CDS-B9-Group11/Capstone Project/Codes/')
from masker import redact_personal_information

def extract_text_from_pdf(file):
    reader = PdfReader(file)
    return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())

def extract_text_from_docx(file):
    doc = docx.Document(file)
    return "\n".join([para.text for para in doc.paragraphs])

def extract_from_zip(zip_file):
    extracted_texts = []
    with zipfile.ZipFile(zip_file) as z:
        for name in z.namelist():
            ext = name.rsplit('.', 1)[-1].lower()
            with z.open(name) as f:
                if ext == "pdf":
                    modified_pdf = redact_personal_information(f)
                    extracted_texts.append(modified_pdf(f))
                elif ext == "docx":
                    extracted_texts.append(extract_text_from_docx(f))
    return extracted_texts

Overwriting resume_uploader.py


In [123]:
%%writefile main.py
import sys
sys.path.append('/content/drive/MyDrive/CDS-B9-Group11/Capstone Project/Codes/')

import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from google.colab import userdata
from datetime import datetime
import os
from streamlit_modal import Modal
from io import BytesIO
from resume_uploader import extract_text_from_pdf, extract_text_from_docx, extract_from_zip
from dotenv import load_dotenv
import os
from masker import redact_personal_information
import pandas as pd
from langchain_core.runnables import RunnableSequence

load_dotenv()
github_token = os.getenv("GITHUB_TOKEN")
endpoint = "https://models.github.ai/inference"
model_name = "openai/gpt-4.1-nano"

def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 10000,
    chunk_overlap = 100
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vector_store(text_chunks):
    embeddings = OpenAIEmbeddings(
        model='text-embedding-3-small',
        dimensions=1536,
        base_url = endpoint,
        api_key= github_token)
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")

def get_conversation_chain():
    model = ChatOpenAI(
    base_url = endpoint,
    api_key= github_token,
    model=model_name,
    temperature=0.1
    )
    chat_prompt_template = ChatPromptTemplate([
         ('system', 'You are good resume parser expert and helps in answering questions. Answer the question from the provided resume. Make sure to provide all the details, if the answer is not in the provided resume just reply back, "Answer is not available in provided resume", dont provide the wrong answers or dont hallucinate the answers.\n Resume: \n {resume}?'),
         ('human', 'Question: \n {question}\nAnswer:')
        ])
    chain = chat_prompt_template|model|StrOutputParser()
    return chain

def resume_summarization(resume_text):
    template = '''Generate a Resume Summary Script in 5-6 bullet points for an engaging video presentation aimed at a hiring manager.
            - Tone: Friendly, Confident, Assertive.
            - Base the script strictly on the given resume content ‚Äî do not hallucinate or add unverifiable details.:\n {resume}'''

    prompt = PromptTemplate(input_variables=['resume'],template=template)
    output_parser = StrOutputParser()
    model = ChatOpenAI(
        base_url = endpoint,
        api_key= github_token,
        model=model_name,
        temperature=0.1
        )
    chain = RunnableSequence(prompt, model, output_parser)

    summarised_text = chain.invoke({'resume': resume_text})

    return summarised_text

def notice_period_prompt():
    model = ChatOpenAI(
    base_url = endpoint,
    api_key= github_token,
    model=model_name,
    temperature=0.1
    )
    chat_prompt_template = ChatPromptTemplate([
         ('system', 'You are good and helpful assistant and helps in answering questions. Find out the notice period from public available sources for most recent organization provided in resume. Make sure to provide to provide accurate details and provide the source link at the end of answer, dont provide the wrong answers or dont hallucinate the answers. \n Resume: \n {resume}?'),
         ('human', 'Question: \n Notice Period of the most recent job organization the candidate working for. \nAnswer:')
        ])
    chain = chat_prompt_template|model|StrOutputParser()
    return chain

def handle_notice_period():
    user_question = "Notice Period of the most recent job organization the candidate working for"
    embeddings = OpenAIEmbeddings(
        model='text-embedding-3-small',
        dimensions=1536,
        base_url = endpoint,
        api_key= github_token,)
    new_db = FAISS.load_local("faiss_index", embeddings=embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)
    chain = notice_period_prompt()
    response = chain.invoke({
        "resume": docs
    })
    return response

def handle_user_input(user_question):
    embeddings = OpenAIEmbeddings(
        model='text-embedding-3-small',
        dimensions=1536,
        base_url = endpoint,
        api_key= github_token,)
    new_db = FAISS.load_local("faiss_index", embeddings=embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)
    chain = get_conversation_chain()
    response = chain.invoke({
        "resume": docs,
        "question": user_question
    })
    return response

def build_resume_df():
    df = pd.DataFrame(columns=['Rank', 'Resume', 'Score', 'Content'])
    return df

resume_summary_content = None

st.set_page_config("Pre-Screening Intelligence: TalAI", layout="wide")
resume_modal = Modal("üìÇ Resume Tools", key="resume-tools-modal", max_width="1000px")

section_list = [
    'Work Experience',
    'Education',
    'Skills',
    'Academic Projects',
    'Certification, Award & Recognition',
    'Career Objectives or Summary'
]

# --- Initialize session state ---
if "resume_df" not in st.session_state:
    st.session_state.resume_df = pd.DataFrame(columns=["Rank", "Resume", "Score", "Content"])
if "show_modal" not in st.session_state:
    st.session_state.show_modal = True
if "close_modal" not in st.session_state:
    st.session_state.close_modal = False
if "selected_index" not in st.session_state:
    st.session_state.selected_index = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history


if "resume_df" not in st.session_state:
    st.session_state.resume_df = build_resume_df()
df = st.session_state.resume_df

# Initialize once
if "show_modal" not in st.session_state:
    st.session_state.show_modal = False

# Inject custom CSS to reduce top padding and fix sticky layout
st.markdown("""
    <style>
        /* Reduce top padding for the whole app */
        .block-container {
            padding-top: 0.8rem;
        }

        /* Sticky button container styling */
        .sticky-buttons {
            position: sticky;
            top: 0;
            background-color: white;
            padding: 0.6rem 0.5rem;
            z-index: 999;
            border-bottom: 1px solid #ddd;
        }

        /* Ensure buttons don't get clipped */
        .element-container button {
            margin-bottom: 0 !important;
        }
    </style>
""", unsafe_allow_html=True)

with st.container():
    st.markdown('<div class="sticky-buttons">', unsafe_allow_html=True)
    top_left, top_right = st.columns([1, 1])
    with top_left:
        if st.button("üì§ Upload Resumes"):
            st.session_state.show_modal = True
    with top_right:
        if st.button("üóëÔ∏è Clear Resumes"):
            st.session_state.resume_df = build_resume_df()
    st.markdown('</div>', unsafe_allow_html=True)

if st.session_state.show_modal:
    with st.expander("Upload Modal", expanded=st.session_state.show_modal):
        st.markdown("""
        <div style="display: flex; flex-direction: column; align-items: center; text-align: center;">
        <img src="https://raw.githubusercontent.com/surabhi13gupta/LangChains/main/TalAI.png" width="60" style="border-radius: 50%; margin-bottom: 1rem;">
        <h4 style="margin-bottom: 0;">TalAI ResumeTools</h4>
        <p style="margin-top: 0;">Upload your resume to extract insights, skills, and suggestions.</p>
        </div>""", unsafe_allow_html=True)

        uploaded_files = st.file_uploader(
            "Upload Resumes (PDF, DOCX, ZIP allowed)",
            type=["pdf", "docx", "zip"],
            accept_multiple_files=True
        )

        stream = st.selectbox(
            "Select your Domain:",
            ["Information Technology", "Finance", "Human Resources", "Sales", "Legal/Advocate", "Engineering"]
        )

        if st.button("Submit and Process") and uploaded_files:
            with st.spinner("Processing..."):
                rank = 1
                for uploaded_file in uploaded_files:
                    all_texts = []
                    ext = uploaded_file.name.rsplit('.', 1)[-1].lower()
                    if ext == "pdf":
                        modified_pdf = redact_personal_information(uploaded_file)
                        all_texts.append(extract_text_from_pdf(modified_pdf))
                    elif ext == "docx":
                        all_texts.append(extract_text_from_docx(uploaded_file))
                    elif ext == "zip":
                        zip_bytes = BytesIO(uploaded_file.read())
                        all_texts.extend(extract_from_zip(zip_bytes))
                    st.session_state.resume_df.loc[len(st.session_state.resume_df)] = [rank, uploaded_file.name, 10, all_texts]
                    rank += 1

                st.success("Resumes uploaded and processed!")
                st.session_state.chat_history = []

    # Close button outside expander
    if st.button("‚úÖ Close"):
        st.session_state.close_modal = True

        #         if st.button("‚úÖ Close"):
        #             st.session_state.show_modal = False
        #             st.experimental_rerun()
        # else:
        #     st.info("Awaiting file upload...")

# Handle modal close early
if st.session_state.get("close_modal", False):
    st.session_state.show_modal = False
    st.session_state.close_modal = False
    st.rerun()

left_col, middle_col, right_col = st.columns([1, 1, 1])

with left_col:
    st.subheader("üßë‚Äçüíº Recruiters Input Dashboard")

    # Job description input
    job_description = st.text_area(
        "üìù Job Description",
        height=200,
        placeholder="Paste the job role or requirements here..."
    )

    for section in section_list:
        with st.expander(f"üìÇ {section}"):
            st.text_area(f"‚úèÔ∏è Edit or review: {section}", height=150, placeholder=f"Enter details for {section}...")

    # Submit button
    if st.button("üöÄ Match Resumes") and job_description:
        with st.spinner("Matching resumes..."):
            # Placeholder for vector search logic
            top_matches = [
                "**1. Jane Doe** ‚Äî ML Engineer, 5 yrs exp, NLP-heavy projects",
                "**2. Ravi Kumar** ‚Äî Time Series Specialist, fintech background",
                "**3. Aisha Rahman** ‚Äî GenAI pipeline builder, LangChain expert",
                "**4. Leo Zhang** ‚Äî Resume parsing wizard, UX-focused",
                "**5. Sara Ali** ‚Äî Dashboard designer, Streamlit + LLM integration"
            ]

            # Display results
            st.markdown("### üèÜ Top Resume Matches")
            for match in top_matches:
                st.markdown(f"- {match}")

with middle_col:
    # First Section: Table Placeholder
    st.subheader("üìä Top 5 Resumes Rank Table")
    # st.markdown("### üìã Resume Rank Table")
    st.write("Table content goes here...")
    # Track selected rows
    selected_rows = []

    # Initialize session state variable
    if "selected_index" not in st.session_state:
        st.session_state.selected_index = None

    # Display each row with a checkbox
    for i in range(len(df)):
        cols = st.columns([1, 1, 1, 1])  # Adjust column widths
        cols[0].write(df.loc[i, "Rank"])
        cols[1].write(df.loc[i, "Resume"])
        cols[2].write(df.loc[i, "Score"])

        # Checkbox reflects whether this row is selected
        selected = cols[3].checkbox(
            "Review",
            key=f"select_{i}",
            value=(st.session_state.selected_index == i),
            on_change=lambda idx=i: st.session_state.update({"selected_index": idx})
        )

        # Append only the selected row
        if st.session_state.selected_index == i:
            selected_rows.append(df.loc[i])

    # Display each row with a checkbox

    # for i in range(len(df)):
    #     cols = st.columns([1, 1, 1, 1])  # Adjust column widths
    #     cols[0].write(df.loc[i, "Rank"])
    #     cols[1].write(df.loc[i, "Resume"])
    #     cols[2].write(df.loc[i, "Score"])
    #     selected = cols[3].checkbox("Review", key=f"select_{i}")
    #     if selected:
    #         selected_rows.append(df.loc[i])

    # selected_df = pd.DataFrame(selected_rows, columns=["Rank", "Resume", "Score", "Review"])

    # Show selected rows
    if selected_rows:
        st.markdown("### Selected Resume for Review:")
        resume_id = selected_rows[0][1]
        resume_content = df.loc[df.Resume == resume_id, 'Content'].values[0]
        st.write(resume_id)
        resume_summary_content = resume_summarization(resume_content)
        combined_text = "\n\n".join(resume_content)
        text_chunks = get_text_chunks(combined_text)
        get_vector_store(text_chunks)
    else:
        resume_summary_content = None

    # Divider (optional)
    st.markdown("---")

    # Second Section: Text Summarization
    st.subheader("üìù Summarization Dashboard")
    if resume_summary_content is None:
        st.markdown("Summary section - This is your selected resume summary...")
    else:
        st.markdown(resume_summary_content)

with right_col:
    st.markdown("### üé• Video Summary")
    st.button("Summarize Video")
    st.write("Video summary will appear here.")

    # Divider (optional)
    st.markdown("---")

    # Chat popover inside right column
    with st.popover("üí¨ Chat with TalAI", icon=":material/chat:", width="content"):
        st.header("üí¨ Chat with TalAI")

        # # Initialize chat history and flag
        # if "chat_history" not in st.session_state:
        #     st.session_state.chat_history = []

        # if "first_question_sent" not in st.session_state:
        #     st.session_state.first_question_sent = False

        # # Send first question only once
        # if not st.session_state.first_question_sent:
        #     first_question = "Candidate Name, total years of experience and recent job organisation"
        #     st.session_state.chat_history.append({
        #         "role": "user",
        #         "content": first_question
        #     })
        #     recent_job = handle_user_input(first_question)
        #     st.session_state.chat_history.append({
        #         "role": "assistant",
        #         "content": recent_job
        #     })
        #     st.session_state.first_question_sent = True  # Prevent future duplication

        # # Handle user input
        # user_question = st.chat_input("Ask TalAI about the selected resume...")
        # if user_question:
        #     st.session_state.chat_history.append({
        #         "role": "user",
        #         "content": user_question
        #     })
        #     response = handle_user_input(user_question)
        #     st.session_state.chat_history.append({
        #         "role": "assistant",
        #         "content": response
        #     })
        # chat_container = st.container()
        # # Render all messages inside chat_container
        # with chat_container:
        #     for msg in st.session_state.chat_history:
        #         role = msg["role"]
        #         prefix = "You: " if role == "user" else "TalAI: "
        #         st.chat_message(role).write(prefix + msg["content"])




        if "chat_history" not in st.session_state:
            st.session_state.chat_history = []

        for msg in st.session_state.chat_history:
            role = "user" if msg["role"] == "user" else "assistant"
            st.chat_message(role).write(msg["content"])

        chat_container = st.container()

        if resume_summary_content:
            if "first_question_sent" not in st.session_state:
                st.session_state.first_question_sent = False

            if not st.session_state.first_question_sent:
                # Handle input first
                first_question = "Candidate Name, total years of experience and recent job organisation"
                st.session_state.chat_history.append({
                        "role": "user",
                        "content": first_question
                    })
                recent_job = handle_user_input(first_question)
                st.session_state.chat_history.append({
                    "role": "assistant",
                    "content": recent_job
                })
                st.session_state.first_question_sent = True

            user_question = st.chat_input("Ask TalAI about the selected resume...")
            if user_question:
                st.session_state.chat_history.append({
                    "role": "user",
                    "content": user_question
                })
                response = handle_user_input(user_question)
                st.session_state.chat_history.append({
                    "role": "assistant",
                    "content": response
                })

            # Render all messages inside chat_container
            with chat_container:
                for msg in (st.session_state.chat_history):  # Latest at top
                    role = "user" if msg["role"] == "user" else "assistant"
                    prefix = "You: " if role == "user" else "TalAI: "
                    st.chat_message(role).write(prefix + msg["content"])


# with right_col:
#     st.header("üí¨ Chat with TalAI")
#     if "chat_history" not in st.session_state:
#         st.session_state.chat_history = []

#     for msg in st.session_state.chat_history:
#         role = "user" if msg["role"] == "user" else "assistant"
#         st.chat_message(role).write(msg["content"])

#     chat_container = st.container()

    # # Handle input first
    # first_question = "Candidate Name, total years of experience and recent job organisation"
    # st.session_state.chat_history.append({
    #         "role": "user",
    #         "content": first_question
    #     })
    # recent_job = handle_user_input(first_question)
    # st.session_state.chat_history.append({
    #     "role": "assistant",
    #     "content": recent_job
    # })

    # st.session_state.chat_history.append({
    #         "role": "user",
    #         "content": "Notice Period information for recent company"
    #     })
    # notice_response = handle_notice_period()
    # st.session_state.chat_history.append({
    #     "role": "assistant",
    #     "content": notice_response
    # })

    # user_question = st.chat_input("Ask TalAI about the selected resume...")
    # if user_question:
    #     st.session_state.chat_history.append({
    #         "role": "user",
    #         "content": user_question
    #     })
    #     response = handle_user_input(user_question)
    #     st.session_state.chat_history.append({
    #         "role": "assistant",
    #         "content": response
    #     })

    # # Render all messages inside chat_container
    # with chat_container:
    #     for msg in (st.session_state.chat_history):  # Latest at top
    #         role = "user" if msg["role"] == "user" else "assistant"
    #         prefix = "You: " if role == "user" else "TalAI: "
    #         st.chat_message(role).write(prefix + msg["content"])

Overwriting main.py


In [124]:
public_url = ngrok.connect(addr=8501)

print(f"Streamlit URL: {public_url}")

Streamlit URL: NgrokTunnel: "https://c3bd28c5eaaf.ngrok-free.app" -> "http://localhost:8501"


In [125]:
!streamlit run main.py&


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.73.172.30:8501[0m
[0m
Enter
Enter
  resume_id = selected_rows[0][1]
  resume_id = selected_rows[0][1]
  resume_id = selected_rows[0][1]
  resume_id = selected_rows[0][1]
[34m  Stopping...[0m
