In [39]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_groq import ChatGroq

import pandas as pd
import uuid
import chromadb
from dotenv import load_dotenv
import os

load_dotenv()

True

In [30]:
GROQ_API_KEY = os.environ.get('GROQ_API_KEY')

In [31]:
# Define Model
llm = ChatGroq(
    temperature= 0,
    groq_api_key=GROQ_API_KEY,
    model_name="llama-3.1-70b-versatile"
)

In [17]:
prompt_extract = PromptTemplate.from_template(
        """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills` and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)

In [None]:
loader = WebBaseLoader("https://job-boards.greenhouse.io/omadahealth/jobs/6162470")
page_data = loader.load().pop().page_content
print(page_data)

In [26]:
# Create Chain
chain_extract = prompt_extract | llm
response = chain_extract.invoke(input={'page_data':page_data})
print(response.content)

```
{
  "role": "Senior Data Analyst, Client Analytics",
  "experience": "At least 7+ years of rigorous BI solution design and programming experience",
  "skills": [
    "Data Analysis and Visualization",
    "Tableau",
    "SQL",
    "ETL",
    "Databases",
    "Data model",
    "Excel"
  ],
  "description": "The client analytics team is responsible for developing our portfolio of reports and data insights that communicates the value of Omada brings across employer and payer populations. Using data spanning app engagement, clinical outcomes, behavioral health indicators, and claims, the team partners with our customer success teams to provide focus and effective data-driven narratives across over 1.5k customers and 600k members."
}
```


In [27]:
# Change the result from STR to JSON
json_parser = JsonOutputParser()
json_response = json_parser.parse(response.content)

In [33]:
json_response, type(json_response)

({'role': 'Senior Data Analyst, Client Analytics',
  'experience': 'At least 7+ years of rigorous BI solution design and programming experience',
  'skills': ['Data Analysis and Visualization',
   'Tableau',
   'SQL',
   'ETL',
   'Databases',
   'Data model',
   'Excel'],
  'description': 'The client analytics team is responsible for developing our portfolio of reports and data insights that communicates the value of Omada brings across employer and payer populations. Using data spanning app engagement, clinical outcomes, behavioral health indicators, and claims, the team partners with our customer success teams to provide focus and effective data-driven narratives across over 1.5k customers and 600k members.'},
 dict)

In [38]:
# import csv
df = pd.read_csv('./data/my_portfolio.csv')

In [40]:
# Setup VectorDB (ChromaDB)
# Create a persistent client to store records in a folder within the directory
client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name='portfolio')


if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])

C:\Users\PJ\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:02<00:00, 37.6MiB/s]


In [41]:
collection.query(query_texts="Experience in Python", n_results=2).get('metadatas',[])

[[{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}]]

In [42]:
job = json_response
job['skills']

['Data Analysis and Visualization',
 'Tableau',
 'SQL',
 'ETL',
 'Databases',
 'Data model',
 'Excel']

In [44]:
links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])

In [45]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Mohan, a business development executive at AtliQ. AtliQ is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
        Remember you are Mohan, BDE at AtliQ. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
)

In [46]:
chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})
print(res.content)

Subject: Unlock Data-Driven Insights with AtliQ's Expertise

Dear Hiring Manager,

I came across the Senior Data Analyst, Client Analytics role at Omada, and I was impressed by the team's focus on developing a portfolio of reports and data insights that communicate the value of Omada's services. As a Business Development Executive at AtliQ, I'd like to introduce you to our company's capabilities in empowering businesses like yours with tailored solutions that foster scalability, process optimization, cost reduction, and heightened overall efficiency.

AtliQ has extensive experience in designing and implementing Business Intelligence (BI) solutions that cater to the needs of organizations like Omada. Our team of experts has a proven track record of delivering data analysis and visualization solutions using tools like Tableau, SQL, ETL, and Excel. We've worked with various databases and data models, ensuring seamless integration and effective data-driven narratives.

Our portfolio showca