In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
GROQ_API_KEY=os.getenv("GROQ_API_KEY")

In [2]:
from langchain_groq import ChatGroq

llm = ChatGroq(api_key=GROQ_API_KEY,model="llama-3.1-70b-versatile")

In [3]:
## loading the job posting data from website

from langchain_community.document_loaders import WebBaseLoader

loader=WebBaseLoader("https://jobs.nike.com/job/R-39044?from=job%20search%20funnel")
page_data=loader.load().pop().page_content
print(page_data)

USER_AGENT environment variable not set, consider setting it to identify your requests.


Apply for Data Engineer, ITC

Search JobsSkip navigationSearch JobsNIKE, INC. JOBSContract JobsJoin The Talent CommunityLife @ NikeOverviewBenefitsBrandsOverviewJordanConverseTeamsOverviewAdministrative SupportAdvanced InnovationAir Manufacturing InnovationAviationCommunicationsCustomer ServiceDesignDigitalFacilitiesFinance & AccountingGovernment & Public AffairsHuman ResourcesInsights & AnalyticsLegalManufacturing & EngineeringMarketingMerchandisingPlanningPrivacyProcurementProduct Creation, Development & ManagementRetail CorporateRetail StoresSalesSocial & Community ImpactSports MarketingStrategic PlanningSupply Chain, Distribution & LogisticsSustainabilityTechnologyLocationsOverviewNike WHQNike New York HQEHQ: Hilversum, The NetherlandsELC: Laakdal, BelgiumGreater China HQDiversity, Equity & InclusionOverviewMilitary InclusionDisability InclusionIndigenous InclusionInternshipsData & AnalyticsData Engineer, ITCKarnataka, IndiaBecome a Part of the NIKE, Inc. Team
NIKE, Inc. does more 

In [8]:
# getting required fields 

from langchain_core.prompts import PromptTemplate

prompt_extract=PromptTemplate.from_template(
    """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills` and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)

chain_extract=prompt_extract | llm
response=chain_extract.invoke(input={"page_data":page_data})
type(response.content)

str

In [9]:
# convert required fields into JSON format

from langchain_core.output_parsers import JsonOutputParser

json_parser=JsonOutputParser()
json_response=json_parser.parse(response.content)
json_response

{'role': 'Data Engineer, ITC',
 'experience': '2+ years of hands-on experience in Data Engineering field',
 'skills': ['In depth Big Data tech stack knowledge',
  'Expertise in PySpark and SQL',
  'Expertise in Databricks, Snowflake, Airflow',
  'Excellent written and verbal communication skills'],
 'description': "As a data engineer you'll be a key pillar of the data engineering team. You will collaborate closely with other engineers to deliver key changes to data pipelines that drive Nike's data strategy. On a day-to-day basis, you'll focus on building, enhancing, and troubleshooting complex data pipelines, collaborating with product managers, engineers, analysts to build, enhance and troubleshoot data pipelines, collaborating with senior, lead and principal engineers to define and implement quality standards across data pipelines, and contributing towards the design and architecture of data pipelines and implementing data quality and reliability measures across data pipelines."}

In [10]:
type(json_response)

dict

In [13]:
import pandas as pd

df = pd.read_csv("sample_portfolio.csv")
df.head()

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio


In [14]:
# saving the datas into vector store

import uuid
import chromadb

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])

In [15]:
job = json_response
job['skills']

['In depth Big Data tech stack knowledge',
 'Expertise in PySpark and SQL',
 'Expertise in Databricks, Snowflake, Airflow',
 'Excellent written and verbal communication skills']

In [16]:
links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])
links

[[{'links': 'https://example.com/ios-portfolio'},
  {'links': 'https://example.com/flutter-portfolio'}],
 [{'links': 'https://example.com/python-portfolio'},
  {'links': 'https://example.com/ml-python-portfolio'}],
 [{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/kotlin-android-portfolio'}],
 [{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}]]

In [18]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Renga Rajan K, a business development executive at AtliQ. AtliQ is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
        Remember you are Renga Rajan K, BDE at AtliQ. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})
print(res.content)

Subject: Expert Data Engineering Solutions for Nike's Data Strategy

Dear Hiring Manager,

I hope this email finds you well. I came across the Data Engineer, ITC role at Nike and was impressed by the company's commitment to leveraging data-driven insights to drive business growth. As a Business Development Executive at AtliQ, I'd like to introduce our team's expertise in data engineering and how we can support Nike's data strategy.

At AtliQ, we have a proven track record of delivering high-quality data engineering solutions that drive business success. Our team of experts possesses in-depth knowledge of the Big Data tech stack, including PySpark, SQL, Databricks, Snowflake, and Airflow. We have a strong portfolio of projects that demonstrate our capabilities in building, enhancing, and troubleshooting complex data pipelines.

Some of our notable projects include:

* [Python-based Data Engineering Solutions](https://example.com/python-portfolio)
* [Machine Learning and Python-based Dat