In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
# search for a company
from googlesearch import search
from core import CompanyProduct

def find_review_urls(
    target: CompanyProduct, pause_seconds=2
):
    query = f'site:www.glassdoor.com/Reviews/ "{target.company}""'

    return list(
        url
        for url in search(
            query, num=10, stop=10, pause=pause_seconds
        )
    )

find_review_urls(CompanyProduct.same('98point6'))

['https://www.glassdoor.com/Reviews/98point6-Reviews-E1181484.htm',
 'https://www.glassdoor.com/Reviews/98point6-Seattle-Reviews-EI_IE1181484.0,8_IL.9,16_IM781.htm',
 'https://www.glassdoor.com/Reviews/Employee-Review-98point6-RVW83458845.htm',
 'https://www.glassdoor.com/Reviews/Employee-Review-98point6-RVW71300373.htm',
 'https://www.glassdoor.com/Reviews/Employee-Review-98point6-RVW54659972.htm',
 'https://www.glassdoor.com/Reviews/98point6-Reviews-E1181484.htm?filter.jobTitleExact=Software+Engineer(Internship)',
 'https://www.glassdoor.com/Reviews/98point6-layoff-Reviews-EI_IE1181484.0,8_KH9,15.htm',
 'https://www.glassdoor.com/Reviews/98point6-Physician-Reviews-EI_IE1181484.0,8_KO9,18.htm',
 'https://www.glassdoor.com/Reviews/98point6-98point6-Reviews-EI_IE1181484.0,8_KH9,17.htm',
 'https://www.glassdoor.com/Reviews/Employee-Review-98point6-RVW71251745.htm']

In [5]:
import glassdoor
import asyncio

glassdoor.BASE_CONFIG["cache"] = True

async def fetch_reviews(url):
    result_reviews = await glassdoor.scrape_reviews(url, max_pages=3)
    return result_reviews

results = await fetch_reviews('https://www.glassdoor.com/Reviews/98point6-Reviews-E1181484.htm')

from pprint import pprint
pprint(results)

[32m2024-07-29 13:00:52.581[0m | [1mINFO    [0m | [36mglassdoor[0m:[36mscrape_reviews[0m:[36m107[0m - [1mscraping reviews from https://www.glassdoor.com/Reviews/98point6-Reviews-E1181484.htm[0m
[32m2024-07-29 13:00:54.648[0m | [1mINFO    [0m | [36mglassdoor[0m:[36mscrape_reviews[0m:[36m115[0m - [1mscraped first page of reviews of https://www.glassdoor.com/Reviews/98point6-Reviews-E1181484.htm, scraping remaining 2 pages[0m
[32m2024-07-29 13:01:14.212[0m | [1mINFO    [0m | [36mglassdoor[0m:[36mscrape_reviews[0m:[36m125[0m - [1mscraped 30 reviews from https://www.glassdoor.com/Reviews/98point6-Reviews-E1181484.htm in 3 pages[0m


{'__typename': 'EmployerReviewsRG',
 'allReviewsCount': 178,
 'currentPage': 1,
 'filteredReviewsCount': 167,
 'lastReviewDateTime': '2024-06-27T00:39:17.030',
 'numberOfPages': 17,
 'queryJobTitle': None,
 'queryLocation': None,
 'ratedReviewsCount': 167,
 'ratingCountDistribution': {'__typename': 'RatingCountDistribution',
                             'careerOpportunities': {'_1': 10,
                                                     '_2': 9,
                                                     '_3': 12,
                                                     '_4': 25,
                                                     '_5': 92,
                                                     '__typename': 'FiveStarRatingCountDistribution'},
                             'compensationAndBenefits': {'_1': 4,
                                                         '_2': 7,
                                                         '_3': 15,
                                                         

  results = await fetch_reviews('https://www.glassdoor.com/Reviews/98point6-Reviews-E1181484.htm')


In [35]:
reviews = results["reviews"]

# sample a random review
import random
review = random.choice(reviews)

try:
    job_title = review["jobTitle"]["text"]
except:
    job_title = "Anonymous"

# parse 'reviewDateTime': '2024-06-27T00:39:17.030to a date
from datetime import datetime
review_date = datetime.strptime(review["reviewDateTime"], "%Y-%m-%dT%H:%M:%S.%f")

print(f"""
{review['ratingOverall']} stars by {job_title} on {review_date.strftime('%B %d, %Y')}
{review['summary']}

Pros:
{review['pros']}

Cons:
{review['cons']}

Advice to Management:
{review['advice']}

      """)

# NamedTuple for a review
from typing import NamedTuple, Optional

class GlassdoorReview(NamedTuple):
    """Wrapper around a Glassdoor review to make autocomplete easier"""
    # raw fields
    advice: Optional[str]
    cons: Optional[str]
    lengthOfEmployment: int
    pros: Optional[str]
    ratingOverall: int
    reviewId: int
    summary: str

    # processed fields
    jobTitle: Optional[str]
    dateTime: datetime

    
    @classmethod
    def from_dict(cls, advice, cons, lengthOfEmployment, pros, ratingOverall, reviewId, summary, jobTitle, reviewDateTime, **_kwargs):
        job_title = jobTitle["text"] if jobTitle else None
        date_time = datetime.strptime(reviewDateTime, "%Y-%m-%dT%H:%M:%S.%f")

        return cls(
            advice, cons, lengthOfEmployment, pros, ratingOverall, reviewId, summary, job_title, date_time
        )

from pprint import pprint
pprint(GlassdoorReview.from_dict(**review))


4 stars by Senior Operations Manager on August 23, 2023
Total Rewards

Pros:
Values-oriented culture, flexible work arrangements, solid health benefits

Cons:
Tough industry, and thus, hard to land on a winning business model

Advice to Management:
Provide clarity and open communication channels

      
GlassdoorReview(advice='Provide clarity and open communication channels', cons='Tough industry, and thus, hard to land on a winning business model', lengthOfEmployment=2, pros='Values-oriented culture, flexible work arrangements, solid health benefits', ratingOverall=4, reviewId=79428508, summary='Total Rewards', jobTitle='Senior Operations Manager', dateTime=datetime.datetime(2023, 8, 23, 20, 29, 46, 583000))


In [38]:
import jinja2

templates = jinja2.Environment(
    loader=jinja2.FileSystemLoader("templates"),
)

parsed_reviews = [
    GlassdoorReview.from_dict(**review)
    for review in reviews
]
parsed_reviews = sorted(parsed_reviews, key=lambda x: x.dateTime, reverse=False)

for review in parsed_reviews:
    print(templates.get_template("glassdoor_review.md").render(review=review))


# 1 stars by Anonymous on 2022-01-11

A lot of this has been said

## Pros

Great goals - to improve access and quality of healthcare for all. A desire for diversity.

## Cons

The interview process is your first indication: this is a technical company trying to do everything measurable and data-based. But with the usual problem of humans not being able to understand their biases and produce valid measures. Having interviewed and been interviewed, I've seen both signs.
Then once you get on board, there's a lot to be celebrated (along with celebrations of people and the 'fun' environment). 
However, you soon realize that this is basically another Amazon in disguise. It makes sense when you consider that the CTO and CPO (along with CMO and many others in leadership roles) are ex-Amazonians. There's a complete lack of understanding of how human beings work, which leads to politics, bullying, exclusion and a lack of pyschological safety. If you're part of the in crowd (and you have enough 

In [47]:
concat_reviews = "\n".join(
    templates.get_template("glassdoor_review.md").render(review=review)
    for review in parsed_reviews
)

print(f"{len(concat_reviews):,} characters in {len(parsed_reviews)} reviews")

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

review_summary_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Please read the following Glassdoor reviews and write a summary of the key pros, cons, and quatations relating to the following aspects of the company:
            - Leadership
            - Compensation and benefits
            - Diversity, equity, and inclusion
            - Work-life balance
            - Growth opportunities
            - Company culture

            Please also include a section summarizing how the company has changed over time, if applicable.

            Please also summarize the relationship between job functions and employee satisfaction.

            Finish the summary with a list of questions that you would ask the company's leadership both following up on the reviews and also asking about topics that were not mentioned in the reviews.

            Provide a clear and concise summary of the key points, avoiding unnecessary details.
            Format the response as Markdown.
            """,
        ),
        (
            "human",
            """
            Company: {company}
            
            Glassdoor reviews: 
            {text}
            """,
        ),
    ]
)

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
runnable = review_summary_prompt | llm
summary_result = runnable.invoke(
    {
        "text": concat_reviews,
        "company": "98point6",
    }
)

print(summary_result.content)

13,887 characters in 30 reviews
('# Summary of 98point6 Glassdoor Reviews\n'
 '\n'
 '## Key Aspects\n'
 '\n'
 '### Leadership\n'
 '- **Pros**: Some reviews mention strong leadership that cares about '
 'employees and takes action on feedback.\n'
 '- **Cons**: Many reviews criticize leadership for being tone-deaf, '
 'inexperienced, and lacking emotional intelligence. There is a perception of '
 'a "boys\' club" mentality and poor communication, especially after '
 'significant leadership changes.\n'
 '- **Quotations**: "Terrible managers and extremely weak pay." "The '
 'management isn’t great- their inexperience in such roles definitely show."\n'
 '\n'
 '### Compensation and Benefits\n'
 '- **Pros**: Generous PTO and flexible work arrangements are frequently '
 'highlighted as positives.\n'
 '- **Cons**: Many employees feel that compensation is low, especially for the '
 'workload expected. There are complaints about a lack of merit increases and '
 'bonuses.\n'
 '- **Quotations**: "T

In [48]:
print(summary_result.content)

# Summary of 98point6 Glassdoor Reviews

## Key Aspects

### Leadership
- **Pros**: Some reviews mention strong leadership that cares about employees and takes action on feedback.
- **Cons**: Many reviews criticize leadership for being tone-deaf, inexperienced, and lacking emotional intelligence. There is a perception of a "boys' club" mentality and poor communication, especially after significant leadership changes.
- **Quotations**: "Terrible managers and extremely weak pay." "The management isn’t great- their inexperience in such roles definitely show."

### Compensation and Benefits
- **Pros**: Generous PTO and flexible work arrangements are frequently highlighted as positives.
- **Cons**: Many employees feel that compensation is low, especially for the workload expected. There are complaints about a lack of merit increases and bonuses.
- **Quotations**: "The PTO and time-off are better than most companies." "People across all departments leave 98point6 for ridiculous compensation 