In [1]:
from langchain_groq import ChatGroq
import os

In [2]:
# extract api key
files = os.listdir('.')
key_file = [file for file in files if file.endswith('.txt')]   
with open(key_file[0], 'r') as file:
    key = file.read()

In [36]:
llm = ChatGroq(
    temperature=0, 
    groq_api_key=key, 
    model_name="gemma2-9b-it"
)

In [None]:
# extracting page content
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://www.accenture.com/in-en/careers/jobdetails?id=ATCI-4643529-S1792693_en&title=Responsible%20AI%20Engineer",)
page_data = loader.load().pop().page_content
print(page_data)

In [29]:
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
        """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills`, Qualification and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)

chain_extract = prompt_extract | llm 
res = chain_extract.invoke(input={'page_data':page_data})


In [30]:
from langchain_core.output_parsers import JsonOutputParser
# as the output is string so that we will use JsonOutputParser to parse the output
json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
json_res

[{'role': 'Responsible AI Engineer',
  'experience': 'Minimum 3 year(s)',
  'skills': 'Machine Learning',
  'Qualification': '15 years full time education',
  'description': 'As a Responsible AI Engineer, you will assess AI systems for adherence to predefined thresholds and benchmarks related to responsible, ethical, and sustainable practices. Design and implement technology mitigation strategies for systems to ensure ethical and responsible standards are achieved.\n\nRoles & Responsibilities:\n- Expected to perform independently and become an SME.\n- Required active participation/contribution in team discussions.\n- Contribute in providing solutions to work-related problems.\n- Develop and implement AI system assessment frameworks.\n- Collaborate with cross-functional teams to ensure ethical AI practices.\n- Research and stay updated on responsible AI trends and best practices.\n- Provide guidance on ethical decision-making in AI implementations.\n- Conduct audits to evaluate AI syste

In [31]:
description = input("Enter the description: ")
print(description)

# extracting content from description
chain_transform = prompt_extract | llm 
res = chain_extract.invoke(input={'page_data':description})

# parsing content to json
json_res_trans = json_parser.parse(res.content)
json_res_trans

{'role': 'Responsible AI Engineer',  'experience': 'Minimum 3 years',  'skills': {'mustHave': ['Machine Learning'], 'goodToHave': ['NLP']},  'qualification': '15 years full-time education',  'description': 'Assess AI systems for adherence to predefined thresholds and benchmarks related to responsible, ethical, and sustainable practices. Design and implement technology mitigation strategies for systems to ensure ethical and responsible standards are achieved.'}


{'role': 'Responsible AI Engineer',
 'experience': 'Minimum 3 years',
 'skills': {'mustHave': ['Machine Learning'], 'goodToHave': ['NLP']},
 'qualification': '15 years full-time education',
 'description': 'Assess AI systems for adherence to predefined thresholds and benchmarks related to responsible, ethical, and sustainable practices. Design and implement technology mitigation strategies for systems to ensure ethical and responsible standards are achieved.'}

In [37]:
email_prompt = PromptTemplate.from_template(
    """
    ### JOB DESCRIPTION:"
    "{description}"
    ### INSTRUCTION:"
    "The text is from the career's page of a website.your job is to Write a professional yet conversational cold email to a recruiter. The email should feel natural and engaging while maintaining a polished tone.
    Start with a warm introduction, briefly mention how the sender came across the recruiter, and highlight relevant AI and machine learning experience.
    Keep the focus on key skills like responsible AI, NLP, and ethical AI practices without sounding overly formal or templated.
    The email should flow smoothly, avoiding bullet points, and include a clear call to action—such as requesting a quick chat about potential opportunities.
    The length should be concise, ideally under 150 words, while still making a strong impression.
    (NO PREAMBLE)
    """
    )

chain_email = email_prompt | llm
email_text = chain_email.invoke(input={'description':description})
print(email_text.content)

Subject: Responsible AI Engineer - [Your Name]

Hi [Recruiter Name],

I came across your company's career page while researching opportunities in responsible AI, and I was immediately drawn to the "Responsible AI Engineer" role. With over 3 years of experience in machine learning and a strong passion for developing ethical and sustainable AI systems, I believe my skills align perfectly with what you're looking for.

I've been particularly focused on incorporating responsible AI principles into my work, including designing mitigation strategies to ensure fairness and transparency. My experience with NLP further strengthens my ability to contribute to projects that prioritize ethical considerations in AI development.

Would you be open to a quick chat to discuss potential opportunities at [Company Name]? I'm eager to learn more about your team and how my expertise can contribute to your mission.

Best regards,

[Your Name] 





Extracting pdf

In [None]:
file_path=r"D:\Kalyani_datascience.docx"

import fitz  # PyMuPDF for PDF
from docx import Document  # python-docx for DOCX
import os

def extract_text(file_path):
    # Check file extension
    file_extension = os.path.splitext(file_path)[1]
    
    if file_extension.lower() == ".pdf":
        return extract_text_from_pdf(file_path)
    elif file_extension.lower() == ".docx":
        return extract_text_from_docx(file_path)
    else:
        raise ValueError("Unsupported file format. Please use a PDF or DOCX file.")

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = "\n".join([page.get_text() for page in doc])
    return text

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    text = []

    # Extract text from paragraphs
    for para in doc.paragraphs:
        text.append(para.text)

    # Extract text from tables
    for table in doc.tables:
        for row in table.rows:
            row_text = [cell.text.strip() for cell in row.cells]
            text.append("\t".join(row_text))  # Join columns with tab spacing

    return "\n".join(text)

text = extract_text(file_path)
print(text)


In [None]:
prompt_extract = PromptTemplate.from_template(
    """
    ### DOCUMENT TEXT:
    {text}
    ### INSTRUCTION:
    The text is extracted from a resume. Your job is to extract the key information from the document and return the following details in JSON format:
    1. Name
    2. Email
    3. Phone Number
    4. Skills
    5. Experience
    6. Certifications
    7. Projects
    8. Summary
    Only return the valid JSON.
    ### VALID JSON (NO PREAMBLE):    
    """
)
res_text= prompt_extract | llm
res = res_text.invoke(input={'text':text})
res= json_parser.parse(res.content)
res

In [None]:
email_prompt = PromptTemplate.from_template(
    """
    ### RESUME TEXT:
    "{text}"
    ### JOB DESCRIPTION:
    {description}
    ### INSTRUCTION:
    The text is extracted from a resume. Your job is to write a professional yet conversational cold email to a recruiter. The email should feel natural and engaging while maintaining a polished tone.
    Start with a warm introduction, briefly mention the details, and highlight relevant experience and project details.
    Keep the focus on key skills from the resume text without sounding overly formal or templated.
    The email should flow smoothly, avoiding bullet points, and include a clear call to action—such as requesting a quick chat about potential opportunities.
    The length should be concise, ideally under 200 words, while still making a strong impression.Mention the relevant details like name and everything in the email.
    (NO PREAMBLE)
    """
)
c_email = email_prompt | llm
e_text = c_email.invoke(input={'text':text,'description':description })
print(e_text.content)