In [39]:
from core import CompanyProduct

target = CompanyProduct("Rad AI", "Omni")

In [40]:
import reddit.summarizer
import reddit.search
import reddit.fetch

from core import CompanyProduct
from search import SearchResult

from collections import ChainMap
from dataclasses import dataclass
from typing import List, Mapping, Optional

@dataclass
class RedditSummary:
    sources: List[SearchResult]

    overall_summary: reddit.summarizer.AggregatedSummaryResult
    summaries: List[reddit.summarizer.ThreadSummaryResult]
    permalinks: Mapping[str, str]

def process_reddit(target: CompanyProduct, num_threads=2, min_comments=2) -> Optional[RedditSummary]:
    reddit_client = reddit.fetch.init()

    # Search for URLs
    search_results = reddit.search.find_submissions(target, num_results=10)

    # Fetch the Submissions from Reddit
    post_submissions = [reddit_client.submission(url=result.link) for result in search_results]

    # Filter Submissions to only those with enough comments
    post_submissions = [submission for submission in post_submissions if submission.num_comments >= min_comments]

    if len(post_submissions) == 0:
        print(f"No posts with enough comments found for {target}")
        return None

    # Limit the number of threads
    post_submissions = post_submissions[:num_threads]

    # Summarize each
    post_summaries = [reddit.summarizer.summarize_submission(target, submission) for submission in post_submissions]

    # Filter out any with over 1 hallucinated comment id
    post_summaries = [summary for summary in post_summaries if summary.is_under_max_hallucinations(1, debug=True)]

    if len(post_summaries) == 0:
        print(f"No valid summaries found for {target}")
        return None
    
    # Index permalinks
    permalinks = ChainMap(*[reddit.fetch.index_permalinks(summary.submission) for summary in post_summaries])

    # Aggregate the summaries
    aggregate_summary = reddit.summarizer.summarize_summaries(target, post_summaries)

    return RedditSummary(
        sources=search_results,
        overall_summary=aggregate_summary,
        summaries=post_summaries,
        permalinks=permalinks
    )

reddit_result = process_reddit(target)
reddit_result

RedditSummary(sources=[SearchResult(title='Powerscribe One -- are you for real? : r/Radiology', link='https://www.reddit.com/r/Radiology/comments/18bymqt/powerscribe_one_are_you_for_real/', snippet='Dec 6, 2023 ... Rad AI is mostly known for automatically generating Impressions and this September launched Omni Reporting which was designed by\xa0...', formattedUrl='https://www.reddit.com/r/Radiology/.../powerscribe_one_are_you_for_real/'), SearchResult(title='DROP DOWN MENU FOR INCIDENTAL FINDINGS IN RADIOLOGY ...', link='https://www.reddit.com/r/Radiology/comments/1eg7775/drop_down_menu_for_incidental_findings_in/', snippet='2 days ago ... Rad AI Omni, Fluency, things like that. There are some tools that can help but are not really built out, like Rad AI Impressions, or require\xa0...', formattedUrl='https://www.reddit.com/r/.../drop_down_menu_for_incidental_findings_in/'), SearchResult(title='Is the Grey Matter DNA Asmuths? : r/Ben10', link='https://www.reddit.com/r/Ben10/comments/18o

In [41]:
from dataclasses import dataclass
from typing import List

from core import CompanyProduct
from search import SearchResult

import markdown

from glassdoor.search import find_review
from glassdoor.scraper import scrape_reviews, scrape_jobs, GlassdoorJob, GlassdoorReview, Url
from glassdoor.summarizer import summarize

@dataclass
class GlassdoorResult:
    # inputs
    company: CompanyProduct

    # intermediate data
    review_page: SearchResult
    raw_reviews: dict
    reviews: List[GlassdoorReview]

    # outputs
    jobs: List[GlassdoorJob]
    summary_markdown: str

    def __add_links(self, html):
        employer, _ = Url.parse_review_url(self.review_page.link)
        gloss_to_link = {}
        for review in self.reviews:
            title = review.jobTitle or "Anonymous"
            date = review.dateTime.strftime('%Y-%m-%d')
            url = Url.review(employer, review.reviewId)

            gloss_to_link[f"{title} on {date}"] = url
            gloss_to_link[f"{title}, {date}"] = url

        for gloss, link in gloss_to_link.items():
            html = html.replace(gloss, f'<a href="{link}">{gloss}</a>')

        return html

    @property
    def summary_html(self):
        html_out = markdown.markdown(self.summary_markdown)

        html_out = self.__add_links(html_out)

        return html_out
    
    @property
    def num_parsed_reviews(self):
        return len(self.reviews)
    
    @property
    def num_raw_reviews(self):
        return self.raw_reviews.get('allReviewsCount', 0)


async def process_glassdoor(target: CompanyProduct, max_pages=1) -> GlassdoorResult:
    review_page = find_review(target)
    company, company_id = Url.parse_review_url(review_page.link)

    # job results, not 100% used yet
    job_results = await scrape_jobs(Url.jobs(company, company_id), max_pages=1)
    jobs = [GlassdoorJob.from_dict(result) for result in job_results]
    jobs = sorted(jobs, key=lambda job: job.jobTitleText)
    
    glassdoor_results = await scrape_reviews(review_page.link, max_pages=max_pages)
    reviews = GlassdoorReview.parse_reviews(glassdoor_results)
    
    review_summary = summarize(target, reviews)
    
    # TODO: Pull out allReviewsCount from glassdoor_results
    return GlassdoorResult(
        target, 
        review_page,
        glassdoor_results,
        reviews, 
        jobs, 
        review_summary.content
        )

glassdoor_result = await process_glassdoor(target)
glassdoor_result

[32m2024-08-01 16:31:34.199[0m | [1mINFO    [0m | [36mglassdoor.scraper[0m:[36mscrape_jobs[0m:[36m82[0m - [1mscraping job listings from https://www.glassdoor.com/Jobs/Rad-AI-Jobs-E3543079.htm?[0m
[32m2024-08-01 16:31:35.267[0m | [1mINFO    [0m | [36mglassdoor.scraper[0m:[36mscrape_jobs[0m:[36m90[0m - [1mscraped first page of jobs of https://www.glassdoor.com/Jobs/Rad-AI-Jobs-E3543079.htm?, scraping remaining 0 pages[0m
[32m2024-08-01 16:31:35.268[0m | [1mINFO    [0m | [36mglassdoor.scraper[0m:[36mscrape_jobs[0m:[36m97[0m - [1mscraped 15 jobs from https://www.glassdoor.com/Jobs/Rad-AI-Jobs-E3543079.htm? in 1 pages[0m
[32m2024-08-01 16:31:35.268[0m | [1mINFO    [0m | [36mglassdoor.scraper[0m:[36mscrape_reviews[0m:[36m110[0m - [1mscraping reviews from https://www.glassdoor.com/Reviews/Rad-AI-Reviews-E3543079.htm[0m
[32m2024-08-01 16:31:35.725[0m | [1mINFO    [0m | [36mglassdoor.scraper[0m:[36mscrape_reviews[0m:[36m118[0m - [1mscra

The prompt context has 11,859 characters in 10 reviews


GlassdoorResult(company=CompanyProduct(company='Rad AI', product='Omni'), review_page=SearchResult(title='Rad AI Reviews: What Is It Like to Work At Rad AI? | Glassdoor', link='https://www.glassdoor.com/Reviews/Rad-AI-Reviews-E3543079.htm', snippet='13 Rad AI reviews. A free inside look at company reviews and salaries posted anonymously by employees.', formattedUrl='https://www.glassdoor.com/Reviews/Rad-AI-Reviews-E3543079.htm'), raw_reviews={'__typename': 'EmployerReviewsRG', 'allReviewsCount': 13, 'currentPage': 1, 'filteredReviewsCount': 13, 'lastReviewDateTime': '2024-07-23T14:56:31.267', 'numberOfPages': 2, 'queryJobTitle': None, 'queryLocation': None, 'ratedReviewsCount': 13, 'ratings': {'__typename': 'EmployerRatings', 'businessOutlookRating': 0.91, 'careerOpportunitiesRating': 4.6, 'ceoRating': 1, 'compensationAndBenefitsRating': 4.3, 'cultureAndValuesRating': 3.9, 'diversityAndInclusionRating': 3.7, 'overallRating': 4.5, 'ratedCeo': {'__typename': 'Ceo', 'id': 982642, 'photoUr

In [42]:
print(glassdoor_result.summary_html)

<h1>Summary of Glassdoor Reviews for Rad AI</h1>
<h2>Key Aspects</h2>
<h3>Leadership</h3>
<p><strong>Pros:</strong>
- Some reviews highlight strong leadership from co-founders, particularly Jeff Chang, who is described as caring and visionary.
- Open and supportive leadership is noted in some positive reviews.</p>
<p><strong>Cons:</strong>
- Many reviews criticize the leadership as egotistical, inexperienced, and incompetent, leading to a toxic work culture.
- There are reports of a lack of trust in employees, with meetings being recorded and a culture of blame.</p>
<p><strong>Quotations:</strong>
- "Egotistical, inexperienced, incompetent leadership" (<a href="https://www.glassdoor.com/Reviews/Employee-Review-Rad-AI-RVW45470196.htm">Senior Software Engineer on 2021-04-13</a>)
- "Co-Founders are wonderful. Jeff Chang is one of a human's smartest, most humble gems." (<a href="https://www.glassdoor.com/Reviews/Employee-Review-Rad-AI-RVW73743628.htm">Anonymous on 2023-02-17</a>)</p>
<h3>C

In [43]:
import re
from datetime import datetime
import os

def eval_filename(target: CompanyProduct, create_folder=True) -> str:
    # Make the output folder
    folder_name = re.sub(r"[^a-zA-Z0-9]", "_", f"{target.company} {target.product}")
    folder_path = f"evaluation/{folder_name}"

    if create_folder:
        os.makedirs(folder_path, exist_ok=True)

    # Create the filename using the current timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{folder_path}/{timestamp}.html"

    return filename

In [48]:
import jinja2

templates = jinja2.Environment(
    loader=jinja2.FileSystemLoader("templates"),
)


with open(eval_filename(target), "w") as f:
    template = templates.get_template("overall.html")
    html = template.render(
        reddit_summary=reddit_result,
        glassdoor_summary=glassdoor_result
    )

    f.write(html)
    print(f"Written to {f.name}")

Written to evaluation/Rad_AI_Omni/20240801_163440.html


In [45]:
glassdoor_result.jobs

[GlassdoorJob(ageInDays=23, goc='accounting manager', jobTitleText='Accounting Manager', locationName='Remote', payCurrency='USD', payPercentile10=100000, payPercentile50=115000, payPercentile90=130000, payPeriod='ANNUAL', salarySource='EMPLOYER_PROVIDED', seoJobLink='https://www.glassdoor.com/job-listing/accounting-manager-rad-ai-JV_KO0,18_KE19,25.htm?jl=1009357190600'),
 GlassdoorJob(ageInDays=135, goc='engineer', jobTitleText='Director of Engineering', locationName='Remote', payCurrency='USD', payPercentile10=195000, payPercentile50=222500, payPercentile90=250000, payPeriod='ANNUAL', salarySource='EMPLOYER_PROVIDED', seoJobLink='https://www.glassdoor.com/job-listing/director-of-engineering-rad-ai-JV_KO0,23_KE24,30.htm?jl=1009193118873'),
 GlassdoorJob(ageInDays=41, goc='platform engineer', jobTitleText='Director of Engineering, Platform', locationName='Remote', payCurrency='USD', payPercentile10=195000, payPercentile50=222500, payPercentile90=250000, payPeriod='ANNUAL', salarySource