In [39]:
from core import CompanyProduct
from search import search, SearchResult

def find_indeed_jobs(target: CompanyProduct) -> SearchResult:
    results = list(search(f'site:www.indeed.com/cmp "{target.company}"', num=1, debug=False))
    if results:
        return results[0]

company_search_results = find_indeed_jobs(CompanyProduct('Rad AI', 'Omni'))
company_search_results

SearchResult(title='RAD AI Jobs and Careers | Indeed.com', link='https://www.indeed.com/cmp/Rad-Ai/jobs', snippet='14 RAD AI jobs. Apply to the latest jobs near you. Learn about salary, employee reviews, interviews, benefits, and work-life balance.', formattedUrl='https://www.indeed.com/cmp/Rad-Ai/jobs')

In [40]:
from pprint import pprint
import indeed
indeed.BASE_CONFIG["cache"] = True

company_result = await indeed.scrape_search(company_search_results.link)
# pprint(result)


[32m2024-08-05 18:37:46.673[0m | [1mINFO    [0m | [36mindeed[0m:[36mscrape_search[0m:[36m49[0m - [1mscraping search: https://www.indeed.com/cmp/Rad-Ai/jobs[0m
[32m2024-08-05 18:37:47.540[0m | [1mINFO    [0m | [36mindeed[0m:[36mscrape_search[0m:[36m64[0m - [1mfound total pages 0 search pages[0m


scraping remaining -1.0 pages


In [42]:
from pydantic import BaseModel, model_validator
from typing import List, Dict, Optional

class Salary(BaseModel):
    currency: str
    salaryTextFormatted: bool
    source: Optional[str]
    text: Optional[str]

    @model_validator(mode='before')
    def _allow_missing_optional(cls, data):
        if "source" not in data:
            data["source"] = None
        if "text" not in data:
            data["text"] = None
        return data

class Attribute(BaseModel):
    label: str
    suid: str

class Attributes(BaseModel):
    attributes: List[Attribute]
    label: str

class JobOverview(BaseModel):
    createDate: int
    displayTitle: str
    expired: bool
    formattedLocation: str
    formattedRelativeTime: str
    jobLocationCity: str
    jobkey: str
    pubDate: int
    remoteLocation: bool
    title: str
    salarySnippet: Salary
    truncatedCompany: str
    taxonomyAttributes: List[Attributes]

job_overviews = [JobOverview(**job) for job in company_result]
job_overviews

[JobOverview(createDate=1705091739000, displayTitle='[ Choose Your Own Role ]', expired=False, formattedLocation='Remote', formattedRelativeTime='30+ days ago', jobLocationCity='Remote', jobkey='3b224a1fc510b30f', pubDate=1705039200000, remoteLocation=True, title='[ Choose Your Own Role ]', salarySnippet=Salary(currency='', salaryTextFormatted=False, source=None, text=None), truncatedCompany='Rad AI', taxonomyAttributes=[Attributes(attributes=[Attribute(label='Full-time', suid='CF3CP')], label='job-types'), Attributes(attributes=[], label='shifts'), Attributes(attributes=[Attribute(label='Remote', suid='DSQF7')], label='remote'), Attributes(attributes=[Attribute(label='Health savings account', suid='7KV6C'), Attribute(label='Health insurance', suid='EY33Q'), Attribute(label='Dental insurance', suid='FQJ2X'), Attribute(label='Flexible spending account', suid='G85UP'), Attribute(label='Paid time off', suid='HW4J4'), Attribute(label='Vision insurance', suid='RZAT2'), Attribute(label='401(

In [43]:
jobKeys = [job.jobkey for job in job_overviews]
jobKeys

jobs = jobKeys[:2]
job_detail_results = await indeed.scrape_jobs(jobs)


[32m2024-08-05 18:38:20.724[0m | [1mINFO    [0m | [36mindeed[0m:[36mscrape_jobs[0m:[36m89[0m - [1mscraping 2 job listings[0m


In [44]:
class JobDetails(BaseModel):
    companyName: str
    companyOverviewLink: str
    companyReviewLink: str
    description: str # html formatted
    formattedLocation: str
    jobNormTitle: Optional[str]
    jobTitle: str
    jobType: str
    jobTypes: Optional[List[str]]
    location: Optional[str]
    remoteLocation: bool
    remoteWorkModel: Dict
    salaryCurrency: Optional[str]
    salaryMax: Optional[int]
    salaryMin: Optional[int]
    salaryType: Optional[str]
    subtitle: str


job_details = [JobDetails(**job) for job in job_detail_results]


from markdownify import markdownify as md

for job_detail in job_details:
    print(f"""
# {job_detail.jobTitle} at {job_detail.companyName}
- {job_detail.formattedLocation}
- {job_detail.jobType}

{md(job_detail.description)[:300]}
""")


# Staff Software Engineer, Backend at Rad AI
- Remote
- Full-time


**About Rad AI**
----------------


 We have raised $80\+ million to date from venture funds and just closed on our series B financing with investors Khosla Ventures, Gradient (Google’s AI fund) and ARTIS. We’ve also formed a partnership with Google to collaborate on the future of generative AI to 


# [ Choose Your Own Role ] at Rad AI
- Remote
- Full-time


**About Rad AI**
----------------


 We have raised $80\+ million to date from venture funds and just closed on our series B financing with investors Khosla Ventures, Gradient (Google’s AI fund) and ARTIS. We’ve also formed a partnership with Google to collaborate on the future of generative AI to 

