In [28]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0,
    api_key="your_api_key"
)

response = llm.invoke("Name of the first supercomputer is ...")

print(response.content)

The name of the first supercomputer is UNIVAC I.


In [29]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://www.databricks.com/company/careers/university-recruiting/data-scientist---new-grad-2025-start-6866554002")
page_data = loader.load().pop().page_content
print(page_data)

Skip to main contentLoginWhy Databricks DiscoverFor ExecutivesFor Startups Lakehouse Architecture DatabricksIQMosaic ResearchCustomersFeatured StoriesSee All CustomersPartnersCloud ProvidersDatabricks on AWS, Azure, and GCPConsulting & System IntegratorsExperts to build, deploy and migrate to DatabricksTechnology PartnersConnect your existing tools to your LakehouseC&SI Partner ProgramBuild, deploy or migrate to the LakehouseData PartnersAccess the ecosystem of data consumersPartner SolutionsFind custom industry and migration solutionsBuilt on DatabricksBuild, market and grow your businessProduct Databricks PlatformPlatform OverviewA unified platform for data, analytics and AIData ManagementData reliability, security and performanceSharingAn open, secure, zero-copy sharing for all dataData WarehousingServerless data warehouse for SQL analyticsGovernanceUnified governance for all data, analytics and AI assetsReal-Time AnalyticsReal-time analytics, AI and applications made simpleArtifici

In [30]:
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
    """
    ### SCRAPED TEXT FROM WEBSITE:
    {page_data}
    ### INSTRUCTION:
    The scraped text is from the career's page of website.
    Your job is to extract the job postings and return them in JSON format containing the
    following keys: 'role', 'experience', 'skills' and 'description'.
    Only return the valid JSON(No list).
    ### VALID JSON(NO PREAMBLE):
    """
)

chain_extract = prompt_extract|llm
response = chain_extract.invoke(input={'page_data':page_data})
print(response.content)

```json
{
    "role": "Data Scientist - New Grad (2025 Start)",
    "experience": "Master's or PhD degree in a quantitative field (e.g., Statistics, Math, Computer Science, Physics, Economics, Operational Research or Engineering)",
    "skills": "SQL, Python, statistical data analysis, machine learning methods, generalized linear regression, regression and classification trees, unsupervised learning methods, causal inference, stochastic processes, time series forecasting",
    "description": "Work with the Data team and cross-functional stakeholders to use data to solve problems, apply expertise in data science methodologies to real data to deliver insights and/or deploy algorithms to the Databricks platform, manage own project end-to-end from requirements gathering, data exploration to presenting insights to stakeholders and/or deployment an algorithm in a production environment."
}
```


In [31]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_res = json_parser.parse(response.content)
json_res

{'role': 'Data Scientist - New Grad (2025 Start)',
 'experience': "Master's or PhD degree in a quantitative field (e.g., Statistics, Math, Computer Science, Physics, Economics, Operational Research or Engineering)",
 'skills': 'SQL, Python, statistical data analysis, machine learning methods, generalized linear regression, regression and classification trees, unsupervised learning methods, causal inference, stochastic processes, time series forecasting',
 'description': 'Work with the Data team and cross-functional stakeholders to use data to solve problems, apply expertise in data science methodologies to real data to deliver insights and/or deploy algorithms to the Databricks platform, manage own project end-to-end from requirements gathering, data exploration to presenting insights to stakeholders and/or deployment an algorithm in a production environment.'}

In [32]:
type(json_res)

dict

In [33]:
import pandas as pd

df = pd.read_csv("my_portfolio.csv")
df

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio
5,"Flutter, Firebase, GraphQL",https://example.com/flutter-portfolio
6,"WordPress, PHP, MySQL",https://example.com/wordpress-portfolio
7,"Magento, PHP, MySQL",https://example.com/magento-portfolio
8,"React Native, Node.js, MongoDB",https://example.com/react-native-portfolio
9,"iOS, Swift, Core Data",https://example.com/ios-portfolio


In [37]:
import uuid
import chromadb

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])

In [39]:
job = json_res
job

{'role': 'Data Scientist - New Grad (2025 Start)',
 'experience': "Master's or PhD degree in a quantitative field (e.g., Statistics, Math, Computer Science, Physics, Economics, Operational Research or Engineering)",
 'skills': 'SQL, Python, statistical data analysis, machine learning methods, generalized linear regression, regression and classification trees, unsupervised learning methods, causal inference, stochastic processes, time series forecasting',
 'description': 'Work with the Data team and cross-functional stakeholders to use data to solve problems, apply expertise in data science methodologies to real data to deliver insights and/or deploy algorithms to the Databricks platform, manage own project end-to-end from requirements gathering, data exploration to presenting insights to stakeholders and/or deployment an algorithm in a production environment.'}

In [40]:
job['skills']

'SQL, Python, statistical data analysis, machine learning methods, generalized linear regression, regression and classification trees, unsupervised learning methods, causal inference, stochastic processes, time series forecasting'

In [36]:
links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])
links

[[{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}]]

In [41]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Soham, a business development executive at Soham.ai. Soham.ai is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of Soham.aI 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase Soham.ai's portfolio: {link_list}
        Remember you are Soham, BDE at Soham.ai. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})
print(res.content)

Subject: Expert Data Science Solutions for Seamless Business Integration

Dear Hiring Manager,

I came across the job description for a Data Scientist - New Grad (2025 Start) at your esteemed organization, and I am excited to introduce Soham.ai as a trusted partner in fulfilling your data science needs. Our team of experts specializes in leveraging AI and software consulting to drive business growth, scalability, and efficiency.

At Soham.ai, we have a proven track record of delivering tailored solutions that cater to the unique requirements of our clients. Our expertise in data science methodologies, including statistical data analysis, machine learning methods, and time series forecasting, aligns perfectly with the skills required for the Data Scientist role.

Our team is well-versed in managing projects end-to-end, from requirements gathering to deployment, and has a strong background in working with cross-functional stakeholders to deliver insights and deploy algorithms in producti