In [50]:
# we import ChatGroq class where our api key and model is store
from langchain_groq import ChatGroq

llm = ChatGroq(
    temperature=0,
    groq_api_key = #Need to create a API Key from ChatGroq LangChain,
    model="llama-3.1-70b-versatile",
)

#to test our model-- goes to groq cloud and will ask our model to get the response
# response=llm.invoke("Who is father of computer")
# print(response.content)

In [None]:
# We use web scrapper to get a job role based on user interest and wrap it 
from langchain_community.document_loaders import WebBaseLoader

#webbaseLoader will extract the data from the link given
loader = WebBaseLoader("https://www.amazon.jobs/en-gb/jobs/2774960/software-development-manager-shopping-design-tech")
page_contents = loader.load()
if page_contents:
    page_data = page_contents[-1].page_content
    print(page_data)
else:
    print("No data loaded.")


In [52]:
#prompt for our LLM which will show the scrapped data and will extract all the details and give response format wise
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
        """
        ### THIS IS THE SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website link provided.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills` and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)

#used to form chain which we pass to the LLm
extraction_chain = prompt_extract | llm 
result = extraction_chain.invoke(input={'page_data': page_data})
content_type = type(result.content)



In [None]:
#used to show the scapped data in JSON format
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_data = json_parser.parse(result.content)
json_data

In [None]:
#we import our metadata which has all the techstack and dummy links which will be used by LLM to refer for job links
import pandas as pd

df = pd.read_csv("my_portfolio.csv")
df

In [55]:
#we upload our data to chromadb 
import uuid
import chromadb

#persistentclient will create a db on disk, it will store it
client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")
#if collection does not have count and being created for the first time then we iterate through all the rows. When we exectue vectorstore folder will be created
if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])

In [None]:
links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])
links

In [None]:
job = json_data
#extracting only skills
job['skills']

In [None]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Ryan, a student at The University of Texas at Dallas pursuing master of science in information technology and Management.
        Remember you are Ryan, Graduate Student at University of Texas at Dallas. 
        Also add the most relevant ones from the following links to showcase Ryan's portfolio: {link_list}
        Do not provide a preamble and make it short.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
response = chain_email.invoke({"job_description": str(job), "link_list": links})
print(response.content)