In [104]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_ollama import OllamaLLM
from langchain.schema import Document
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.output_parsers import JsonOutputParser
import pandas as pd
import chromadb
import uuid

In [158]:
url = "https://about.puma.com/en/jobs/it-coordinator-r33467"

In [177]:
prompt2 = PromptTemplate.from_template(
    """
    ### SCRAPED TEXT:
    {page_data}

    ### INSTRUCTION:
    Extract job postings from the About Job section. Return a single JSON with these keys: `role (with companys name example: AT xyz company)`, `experience`, `skills`, and `description`.
    - Respond with valid JSON only, no extra text.
    """
)


In [178]:
prompt1 = PromptTemplate.from_template(
    """
    {page_data}  this is scraped data i want you to find and make this data human readable 
    """
)

In [179]:
loader = WebBaseLoader(url)
extracted_content = loader.load().pop().page_content

In [180]:
api_key = "AIzaSyBef3yDj-6Mpc6oifavMzrwCtLQD4TbZa8"
llm = GoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)

In [181]:
chain = prompt1 | llm

In [182]:
response1 = chain.invoke(input={"page_data":extracted_content})


In [183]:
extracted_content

"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIT Coordinator\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\t\t\t\tSkip to main content\n\t\t\t\n\n\n\n\n\n\n\n\n\n\n\n\n\nMenu\n\n\n\n\n\n\t\t\t\t\t\t\tBack\n\t\t\t\t\t\t\n\n\nClose\n\n\n\n\n\nThis is PUMA\n\n\n\nAbout Puma\n\n\nOUR STRATEGY\n\n\nOur Sports\n\n\n\nFootball and Other Teamsports\n\n\nTrack and Field\n\n\nMotorsport\n\n\nGolf\n\n\nBasketball\n\n\n\n\n\nOur Management\n\n\nHistory\n\n\nArchive Stories\n\n\n\n\n\nNewsroom\n\n\n\nNews\n\n\nImages and Footage\n\n\nMedia Contacts\n\n\nNews Calendar\n\n\nEmployee Magazine\n\n\n\n\n\nInvestor Relations\n\n\n\nShare\n\n\n\nShare Price\n\n\nAnalyst Coverage\n\n\nShare Buyback 2024 - 2025\n\n\nShareholder Structure\n\n\nVoting Rights Notifications\n\n\nDirectors' Dealings\n\n\n\n\n\nInvestor News\n\n\nFinancial Publications\n\n\nCalendar\n\n\nCapital Markets Day\n\n\nAnnual General Meeting\n\n\nOur Management\n\n\nCorporate Governance\n\n\nSustai

In [184]:
print(response1)

Okay, here's a human-readable version of the PUMA IT Coordinator job posting, extracted and organized for clarity:

**Job Title:** IT Coordinator

**Company:** PUMA

**Location:** District 2, Bucharest, Romania

**About PUMA:**

*   PUMA is a global sports brand embodying "SPEED & SPIRIT," driven by values of being driven, vibrant, together, and authentic.
*   PUMA supports over 21,000 employees across 51 countries.
*   The PUMA Group owns the brands PUMA, Cobra Golf and stichd, and is headquartered in Herzogenaurach, Germany.
*   PUMA is committed to equal opportunities, diversity, and does not tolerate harassment or discrimination.

**Job Overview:**

This position is responsible for managing and maintaining the IT infrastructure for PUMA's office and retail stores in Bucharest. This includes server administration, network management, user support, and ensuring the smooth operation of IT systems. The IT Coordinator will also be involved in IT strategy, budgeting, and project implemen

In [185]:
chain2 = prompt2 | llm

In [186]:
final_response = chain2.invoke(input={"page_data":response1})

In [187]:
print(final_response)

```json
{
  "role": "IT Coordinator at PUMA",
  "experience": "Minimum 5 years of experience in IT systems administration.",
  "skills": [
    "Microsoft Windows Server",
    "Active Directory",
    "DNS",
    "Windows 10",
    "Office 365",
    "Internet",
    "Network technologies",
    "Technical documentation creation",
    "Vendor Management",
    "Budget Management",
    "Strategic Planning",
    "Procurement",
    "System Administration",
    "IT Support",
    "Equipment Management",
    "Backup & Recovery",
    "User Support"
  ],
  "description": "This position is responsible for managing and maintaining the IT infrastructure for PUMA's office and retail stores in Bucharest. This includes server administration, network management, user support, and ensuring the smooth operation of IT systems. The IT Coordinator will also be involved in IT strategy, budgeting, and project implementation."
}
```


In [188]:
json_response = JsonOutputParser().parse(final_response)

In [189]:
print(type(json_response))

<class 'dict'>


In [190]:
portfolio_data = pd.read_csv("my_portfolio.csv")
portfolio_data

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio
5,"Flutter, Firebase, GraphQL",https://example.com/flutter-portfolio
6,"WordPress, PHP, MySQL",https://example.com/wordpress-portfolio
7,"Magento, PHP, MySQL",https://example.com/magento-portfolio
8,"React Native, Node.js, MongoDB",https://example.com/react-native-portfolio
9,"iOS, Swift, Core Data",https://example.com/ios-portfolio


In [191]:
client = chromadb.PersistentClient( 'vectorstore' )

collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _,row in portfolio_data.iterrows():
        collection.add(documents=row["Techstack"],
                metadatas={ "links" :row[ "Links" ] },
                        ids=[str(uuid.uuid4())])

In [192]:
links = collection.query(query_texts=json_response['skills'],n_results=2).get('metadatas')
links

[[{' links ': 'https://example.com/angular-portfolio'},
  {' links ': 'https://example.com/wordpress-portfolio'}],
 [{' links ': 'https://example.com/angular-portfolio'},
  {' links ': 'https://example.com/magento-portfolio'}],
 [{' links ': 'https://example.com/magento-portfolio'},
  {' links ': 'https://example.com/full-stack-js-portfolio'}],
 [{' links ': 'https://example.com/java-portfolio'},
  {' links ': 'https://example.com/xamarin-portfolio'}],
 [{' links ': 'https://example.com/xamarin-portfolio'},
  {' links ': 'https://example.com/android-portfolio'}],
 [{' links ': 'https://example.com/android-tv-portfolio'},
  {' links ': 'https://example.com/ml-python-portfolio'}],
 [{' links ': 'https://example.com/ml-python-portfolio'},
  {' links ': 'https://example.com/xamarin-portfolio'}],
 [{' links ': 'https://example.com/kotlin-backend-portfolio'},
  {' links ': 'https://example.com/wordpress-portfolio'}],
 [{' links ': 'https://example.com/typescript-frontend-portfolio'},
  {' li

In [194]:
len(links)

17

In [197]:
collection_query = collection.query(query_texts=json_response['skills'],n_results=1).get('documents')
collection_query

[['Angular,.NET, SQL Server'],
 ['Angular,.NET, SQL Server'],
 ['Magento, PHP, MySQL'],
 ['Java, Spring Boot, Oracle'],
 ['Cross-platform, Xamarin, Azure'],
 ['Android TV, Kotlin, Android NDK'],
 ['Machine Learning, Python, TensorFlow'],
 ['Backend, Kotlin, Spring Boot'],
 ['Frontend, TypeScript, Angular'],
 ['Frontend, TypeScript, Angular'],
 ['Kotlin, Android, Firebase'],
 ['Machine Learning, Python, TensorFlow'],
 ['Java, Spring Boot, Oracle'],
 ['iOS, Swift, ARKit'],
 ['Machine Learning, Python, TensorFlow'],
 ['Backend, Kotlin, Spring Boot'],
 ['Magento, PHP, MySQL']]

In [198]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are SHIVAM, a business development executive at gambler. gambler is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a structure cold EMAIL to the client regarding the job mentioned above describing the capability of gambler 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase gambler's portfolio: {link_list}
        Remember you are shivam, BDE at gambler. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )


In [199]:
chain_email = prompt_email | llm

In [200]:
email = chain_email.invoke(input={"job_description":str(json_response),"link_list":links})

In [201]:
print(email)

Subject: Optimizing IT Infrastructure & Automation for PUMA in Bucharest

Dear Hiring Manager,

My name is Shivam, and I'm a Business Development Executive at gambler, an AI & Software Consulting company. I came across the IT Coordinator opening at PUMA in Bucharest, and I was immediately struck by how gambler's expertise aligns with your needs.

Specifically, your requirement for experience in system administration (Microsoft Windows Server, Active Directory, DNS, Windows 10, Office 365), network technologies, user support, and vendor management resonates deeply with our capabilities. We have a proven track record of helping companies like yours streamline their IT operations through automation and strategic implementation.

gambler specializes in facilitating the seamless integration of business processes through automated tools. We've helped numerous enterprises achieve scalability, process optimization, cost reduction, and increased efficiency. We can assist PUMA in Bucharest with: