In [None]:
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import JsonOutputParser
import pandas as pd
import uuid
import chromadb

In [None]:
GROQ_API_KEY = %env GROQ_API_KEY
llm = ChatGroq(
    temperature=0,
    groq_api_key=GROQ_API_KEY, 
    model_name="llama-3.1-70b-versatile"
)

In [None]:
loader = WebBaseLoader("https://www.naukri.com/job-listings-cognizant-hiring-for-net-react-developer-cognizant-technology-solutions-india-ltd-kolkata-6-to-11-years-211124502745?src=jobsearchDesk&sid=17354079545416691&xp=1&px=1")
data = loader.load().pop().page_content
print(data)

In [None]:
prompt_extract = PromptTemplate.from_template(
        """
        ### SCRAPED TEXT FROM WEBSITE:
        {job_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills` and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)

chain_extract = prompt_extract | llm 
res = chain_extract.invoke(input={'job_data':data})

In [None]:
json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
print(json_res)

In [None]:
df = pd.read_csv("my_portfolio.csv")

In [None]:
client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])

In [None]:
jobs = json_res
jobs[0]['skills']

In [None]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Mohan, a business development executive at AtliQ. AtliQ is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
        Remember you are Mohan, BDE at AtliQ. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

links = []
chain_email = prompt_email | llm

for job in jobs:
    links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])
    res = chain_email.invoke({"job_description": str(job), "link_list": links})
    print(res.content)
    print('--------------------------------------------------------------------------')