In [25]:
!pip install langchain-groq langchain-community huggingface_hub PyPDF2 langchain_huggingface faiss-cpu dotenv gradio



In [26]:
from langchain_groq import ChatGroq
import gradio as gr
from PyPDF2 import PdfReader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from huggingface_hub import login
from dotenv import load_dotenv
import os
import io

In [27]:
load_dotenv()
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))

In [28]:
 # Functions are defined here
# PDF Text Extraction
def get_pdf_text(pdf_file):
    reader = PdfReader(pdf_file)
    raw_text = ""
    for page in reader.pages:
        if page.extract_text():
            raw_text += page.extract_text()
    return raw_text

# -------------------------------
# Text Chunking
def get_text_chunks(raw_text):
    splitter = CharacterTextSplitter(
        separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
    )
    chunks = splitter.split_text(raw_text)
    return chunks

# -------------------------------
# Vector Embedding
def get_vectorstore(chunks):
    embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-xl")
    vectorstore = FAISS.from_texts(chunks, embedding=embeddings)
    return vectorstore

# -------------------------------
# LLM Setup (API key set directly)
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    temperature=0.7,
    request_timeout=30,
    groq_api_key="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"
) # Added closing parenthesis here

# -------------------------------
# RAG Chain
def create_rag_chain(vectorstore):
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    rag_template = """
    You are a professional cover letter writer. Using the following student profile information, generate a concise and tailored cover letter for the specified job position and company. Ensure the tone is professional, enthusiastic, and highlights relevant skills, education, and experiences from the context. If specific details (e.g., company name, job title) are not provided in the query, use placeholders and note they can be customized.

    <context>
    {context}
    </context>

    Query: {question}

    Cover Letter:
    """
    prompt = ChatPromptTemplate.from_template(rag_template)

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return chain

# -------------------------------
# Gradio Inference Function

def ensure_list(input_files):
    if not isinstance(input_files, list):
        return [input_files]
    return input_files


def generate_cover_letter_ui(pdf_files, company_name, student_name_input):
    pdf_files = ensure_list(pdf_files)
    student_name_lower = student_name_input.strip().lower()

    # More flexible filename match
    matchName = [
        f for f in pdf_files
        if f.name.lower().endswith(".pdf")
        and student_name_lower in f.name.lower()
    ]

    if matchName:
        selected_pdf = matchName[0]
    else:
        return f"❌ No PDF found that contains the name '{student_name_input}'. Please check the filename."

    # Extract raw text from the PDF file object
    raw_text = get_pdf_text(selected_pdf)
    chunks = get_text_chunks(raw_text)
    vectorstore = get_vectorstore(chunks)
    rag_chain = create_rag_chain(vectorstore)

    query = f"Generate a cover letter for a software engineering internship at {company_name}."
    result = rag_chain.invoke(query)

    return result



with gr.Blocks() as demo:
    gr.Markdown("## 🧑‍💼 Student Cover Letter Generator")

    with gr.Row():
        pdf_input = gr.File(label="Upload Student PDFs", file_types=[".pdf"], file_count="multiple")
        company_input = gr.Textbox(label="Enter Company Name")
        student_name_input = gr.Textbox(label="Enter Student Name")


    generate_button = gr.Button("Generate Cover Letter")
    output = gr.Textbox(label="Generated Cover Letter", lines=20)

    generate_button.click(
        fn=generate_cover_letter_ui,
        inputs=[pdf_input, company_input, student_name_input],
        outputs=output
    )

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ce8558c8161b5c1d49.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


