In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [59]:
from core import CompanyProduct

target = CompanyProduct("Rad AI", "Omni")

In [60]:
# search for a company
from googlesearch import search
from core import CompanyProduct
import re

def find_review_urls(
    target: CompanyProduct, pause_seconds=2
):
    query = f'site:www.glassdoor.com/Reviews/ "{target.company}""'

    return list(
        url
        for url in search(
            query, num=10, stop=10, pause=pause_seconds
        )
        if re.match(r".*-Reviews-.*", url)
    )

urls = find_review_urls(target)
url = urls[0]

urls

['https://www.glassdoor.com/Reviews/Rad-AI-Reviews-E3543079.htm',
 'https://www.glassdoor.com/Reviews/Rad-AI-team-Reviews-EI_IE3543079.0,6_KH7,11.htm',
 'https://www.glassdoor.com/Reviews/Rad-AI-Berkeley-Reviews-EI_IE3543079.0,6_IL.7,15_IC1147330.htm',
 'https://www.glassdoor.com/Reviews/Rad-AI-Head-of-Research-Reviews-EI_IE3543079.0,6_KO7,23.htm']

In [61]:
import glassdoor

glassdoor.BASE_CONFIG["cache"] = True

async def fetch_reviews(url):
    result_reviews = await glassdoor.scrape_reviews(url, max_pages=3)
    return result_reviews

glassdoor_results = await fetch_reviews(url)

# from pprint import pprint
# pprint(results)

[32m2024-07-29 16:39:58.734[0m | [1mINFO    [0m | [36mglassdoor[0m:[36mscrape_reviews[0m:[36m107[0m - [1mscraping reviews from https://www.glassdoor.com/Reviews/Rad-AI-Reviews-E3543079.htm[0m
[32m2024-07-29 16:39:59.905[0m | [1mINFO    [0m | [36mglassdoor[0m:[36mscrape_reviews[0m:[36m115[0m - [1mscraped first page of reviews of https://www.glassdoor.com/Reviews/Rad-AI-Reviews-E3543079.htm, scraping remaining 1 pages[0m
[32m2024-07-29 16:40:02.423[0m | [1mINFO    [0m | [36mglassdoor[0m:[36mscrape_reviews[0m:[36m125[0m - [1mscraped 13 reviews from https://www.glassdoor.com/Reviews/Rad-AI-Reviews-E3543079.htm in 2 pages[0m


In [62]:
from datetime import datetime
from typing import NamedTuple, Optional

class GlassdoorReview(NamedTuple):
    """Wrapper around a Glassdoor review to make autocomplete easier"""
    # raw fields
    advice: Optional[str]
    cons: Optional[str]
    lengthOfEmployment: int
    pros: Optional[str]
    ratingOverall: int
    reviewId: int
    summary: str

    # processed fields
    jobTitle: Optional[str]
    dateTime: datetime

    
    @classmethod
    def from_dict(cls, advice, cons, lengthOfEmployment, pros, ratingOverall, reviewId, summary, jobTitle, reviewDateTime, **_kwargs):
        job_title = jobTitle["text"] if jobTitle else None
        date_time = datetime.strptime(reviewDateTime, "%Y-%m-%dT%H:%M:%S.%f")

        return cls(
            advice, cons, lengthOfEmployment, pros, ratingOverall, reviewId, summary, job_title, date_time
        )


In [63]:
import jinja2

templates = jinja2.Environment(
    loader=jinja2.FileSystemLoader("templates"),
)

parsed_reviews = [
    GlassdoorReview.from_dict(**review)
    for review in glassdoor_results["reviews"]
]
parsed_reviews = sorted(parsed_reviews, key=lambda x: x.dateTime, reverse=False)

# for review in parsed_reviews:
#     print(templates.get_template("glassdoor_review.md").render(review=review))


In [66]:
concat_reviews = "\n\n".join(
    templates.get_template("glassdoor_review.md").render(review=review)
    for review in parsed_reviews
)

print(f"The prompt context has {len(concat_reviews):,} characters in {len(parsed_reviews)} reviews")

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

review_summary_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Please read the following Glassdoor reviews and write a summary of the key pros, cons, and quotations relating to the following aspects of the company:
            - Leadership
            - Compensation and benefits
            - Diversity, equity, and inclusion
            - Work-life balance
            - Growth opportunities
            - Company culture

            Please also include a section summarizing how the company has changed over time, if applicable.

            Please also summarize the relationship between job functions and employee satisfaction.

            Finish the summary with a list of questions that you would ask the company's leadership both following up on the reviews and also asking about topics that were not mentioned in the reviews.

            Provide a clear and concise summary of the key points, avoiding unnecessary details.
            Format the response as Markdown.

            In quotations please format like: "quote" (job title on date)
            """,
        ),
        (
            "human",
            """
            Company: {company}
            
            Glassdoor reviews: 
            {text}
            """,
        ),
    ]
)

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
runnable = review_summary_prompt | llm
summary_result = runnable.invoke(
    {
        "text": concat_reviews,
        "company": target.company,
    }
)

print(summary_result.content)

The prompt context has 12,927 characters in 13 reviews
# Summary of Glassdoor Reviews for Rad AI

## Key Aspects

### Leadership
**Pros:**
- Some reviews highlight strong leadership from co-founders, particularly Jeff Chang, who is described as caring and visionary.
- Open and supportive leadership is noted in several positive reviews.

**Cons:**
- Many reviews criticize the current leadership as egotistical, inexperienced, and incompetent, leading to a toxic work culture.
- There are reports of animosity among co-founders and a lack of trust in employees, with meetings being recorded due to distrust.

**Quotations:**
- "Egotistical, inexperienced, incompetent leadership" (Senior Software Engineer on 2021-04-13)
- "Co-Founders are wonderful. Jeff Chang is one of a human's smartest, most humble gems." (Anonymous on 2023-02-17)

### Compensation and Benefits
**Pros:**
- Good pay and decent benefits are mentioned positively in several reviews.
- Equity is noted as likely to be valuable.

