In [1]:
import reddit
from reddit_summarizer import summarize_submission
from core import CompanyProduct

def test_single_summary(target: CompanyProduct):
    reddit_client = reddit.init()

    # Search for URLs
    post_url = reddit.find_submission_urls(target, num_results=10)[0]

    # Fetch the Submissions from Reddit
    post_submission = reddit_client.submission(url=post_url)

    # Summarize each
    return summarize_submission(target, post_submission)

summary = test_single_summary(CompanyProduct.same("98point6"))

print(summary.to_markdown())

# Summary: Internet medicine is awesome, 98point6 was so so helpful for me (thread id: bg7ip2)

The Reddit thread discusses user experiences with 98point6, an online medical service that provides affordable consultations and prescriptions. Users share positive experiences regarding accessibility and affordability, while some express concerns about limitations in prescribing certain medications.

## User Experience

### Strengths

- "It was amazing. ... I would 100% use 98point6 again, especially since my $20 gets me unlimited visits for a year." (source: bg7ip2)
- "This amazing awesome doctor ... was actively prepared to address the needs of people who have limited funds." (source: bg7ip2)
- "Helped me get paxlovid for covid more quickly than I otherwise would have been able to." (source: idl52u3)


### Weaknesses

- "My husband and I have used 98.6 three times. All three times they did not prescribe the needed antibiotic to get better." (source: hrmpl3t)
- "It's not the right place to

In [1]:
import reddit
from reddit_summarizer import summarize_submission, summarize_summaries
from core import CompanyProduct

def short_test(target: CompanyProduct, num_posts: int = 10):
    reddit_client = reddit.init()

    # Search for URLs
    post_urls = reddit.find_submission_urls(target, num_results=num_posts)[:2]

    # Fetch the Submissions from Reddit
    post_submissions = [reddit_client.submission(url=url) for url in post_urls]

    # Summarize each
    post_summaries = [summarize_submission(target, submission) for submission in post_submissions]

    # Aggregate the summaries
    aggregate_summary = summarize_summaries(target, post_summaries)

    return aggregate_summary

aggregate_summary = short_test(CompanyProduct.same("98point6"))

print(aggregate_summary.to_markdown())

# 98point6 / 98point6

The Reddit threads discuss user experiences with 98point6, an online medical service that provides affordable consultations and prescriptions. Users generally appreciate the accessibility and cost-effectiveness of the service, but some express concerns about limitations in prescribing certain medications and the appropriateness of the service for more serious medical issues. There is no information available regarding employee experiences or investor perspectives.

## User Experience

### Strengths


- "It was amazing. I chatted with some weird robot for 10 minutes or so and answered questions about what my problem was, then I got connected to this nice doctor lady." (source: bg7ip2)
- "This amazing awesome doctor... was actively prepared to address the needs of people who have limited funds." (source: bg7ip2)
- "I would 100% use 98point6 again, especially since my $20 gets me unlimited visits for a year." (source: bg7ip2)
- "I really appreciate the accessibility

In [5]:
from pprint import pprint
pprint(aggregate_summary)

AggregatedSummaryResult(target=CompanyProduct(company='98point6', product='98point6'), summaries=[ThreadSummaryResult(submission=Submission(id='bg7ip2'), text='\n# Post bg7ip2:  Internet medicine is awesome, 98point6 was so so helpful for me by FrugalChef13 on 2019-04-22 [+55 votes]\n**TL;DR- $20 got me an awesome appointment with a nice doctor and a prescription for a medication I could afford that solved my issue.**\n\n*Disclaimer: This particular thing worked well for me so I\'m going to tell you about it. Everyone is different, so it might not work as well (or at all) for you.  Take what you find useful from this post and ignore the rest.  I\'m not compensated or connected to the website I\'m discussing.*\n\nSo like a lot of people on here I\'m usually either uninsured or underinsured.  Right now it\'s underinsured with a high deductible, so when I messed my back up badly enough that I could barely move I freaked.  I\'ve got scoliosis, a fucked up spine, bad knees, and muscles that

In [15]:
from core import CompanyProduct
import reddit
from reddit_summarizer import thread_summary_prompt, json_instructions, ThreadSummary, summary_to_markdown, llm

reddit_client = reddit.init()

def summarize_thread(target: CompanyProduct, url: str, text_max_chars=40000) -> ThreadSummary:
    submission = reddit_client.submission(url=url)
    text = reddit.submission_to_markdown(submission)

    if len(text) > text_max_chars:
        print(f"Text too long: {len(text)} > {text_max_chars}. Truncating.")
        text = text[:text_max_chars]
    
    runnable = thread_summary_prompt | llm.with_structured_output(schema=ThreadSummary, method="json_mode")
    summary_result = runnable.invoke({"text": text, "company": target.company, "product": target.product, "json_instructions": json_instructions})
    return ThreadSummary(submission=submission, text=text, summary_result=summary_result)


summary = summarize_thread(CompanyProduct("Singularity 6", "Palia"), "https://www.reddit.com/r/MMORPG/comments/1bz2e0z/palia_developers_singularity_6_axes_35_of_staff/")

print(summary_to_markdown(summary))



# Summary: Palia developers, Singularity 6, axes 35% of staff just after Steam launch (thread id: 1bz2e0z)

The Reddit thread discusses the recent layoffs at Singularity 6, the developers of Palia, following a lukewarm reception after the game's Steam launch. Users express mixed feelings about the game, highlighting both its strengths and weaknesses, while also commenting on the company's situation and future prospects.

## User Experience

### Strengths

- "I enjoy the characters and setting." (source: kyppu7o)
- "The story and characters are great, if you like those kind of stories." (source: SvenWollinger)
- "The foundations are 100% amazing. The mining, the hunting, the way you can place furniture and items, even the character designs are great." (source: payne6)

### Weaknesses

- "Palia felt restrictive... you had to have had more freedom for players to build solo." (source: kyn4d7g)
- "The gameplay loop was so insanely boring, only two zones and max. 24 other Players with you o

In [23]:
import jinja2

# templates = jinja2.FileSystemLoader("templates")
env = jinja2.Environment(
    loader=jinja2.FileSystemLoader("templates"),
    # autoescape=select_autoescape()
)
template = env.get_template("thread_summary.md")

In [25]:
print(template.render(submission=summary.submission, summary_result=summary.summary_result))

# Summary: Palia developers, Singularity 6, axes 35% of staff just after Steam launch (thread id: 1bz2e0z)

The Reddit thread discusses the recent layoffs at Singularity 6, the developers of Palia, following a lukewarm reception after the game's Steam launch. Users express mixed feelings about the game, highlighting both its strengths and weaknesses, while also commenting on the company's situation and future prospects.

## User Experience

### Strengths

- "I enjoy the characters and setting." (source: kyppu7o)
- "The story and characters are great, if you like those kind of stories." (source: SvenWollinger)
- "The foundations are 100% amazing. The mining, the hunting, the way you can place furniture and items, even the character designs are great." (source: payne6)


### Weaknesses

- "Palia felt restrictive... you had to have had more freedom for players to build solo." (source: kyn4d7g)
- "The gameplay loop was so insanely boring, only two zones and max. 24 other Players with you o

In [99]:
import hashlib
import os


# TODO: Fix this broken evaluation function
def short_evaluation(target: CompanyProduct, num_threads=2):
    # This is cached so it should be quick
    thread_urls = reddit.find_submission_urls(target, num_results=10)[:num_threads]

    # The ID of the test is the last 4 chars of the sha of the url list
    test_id = hashlib.sha256("".join(thread_urls).encode()).hexdigest()[-4:]
    
    folder = f"evaluation/{target.company}_{target.product}/{test_id}"
    os.makedirs(folder, exist_ok=True)

    # individual thread results
    results = [summarize_thread(target, url) for url in thread_urls]

    # aggregation result
    aggregation_result = summarize_summaries(target, results)

    # make a unified page
    # TODO: Migrate all of this to Jinja2
    result_htmls = "\n".join(r.to_html() for r in results)
    html_result = wrap_html(f"""
{aggregation_result.to_html()}

<hr/>

<h1>Debugging the aggregation</h1>

<h2>Hallucination evaluation</h2>
Note: This only evaluates the evaluation stage, not the mapping stage.
{aggregation_result.evaluate()}

<h2>Aggregation prompt</h2>
{summarize_prompt(aggregation_prompt)}

<h2>Aggregation input (converted markdown to HTML)</h2>
{markdown.markdown(aggregation_result.aggregation_prompt_context)}

<hr/>

<h1>Debugging the mapping</h1>

<h2>Mapping prompt</h2>

{summarize_prompt(thread_summary_prompt)}


<h2>Individual summaries</h2>
{result_htmls}
""")

    # Create the filename using the current timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{folder}/{timestamp}.html"

    with open(filename, "w") as f:
        f.write(html_result)

    print(f"Results for {target} saved to {filename}")

short_evaluation(CompanyProduct.same("98point6"), 5)


Results for CompanyProduct(company='98point6', product='98point6') saved to evaluation/test_9a15/20240728_203856.html


In [42]:
short_evaluation(CompanyProduct("Rad AI", "Omni"), 5)
short_evaluation(CompanyProduct("Singularity 6", "Palia"), 5)
short_evaluation(CompanyProduct.same("Instacart"), 5)


Results for CompanyProduct(company='98point6', product='98point6') saved to evaluation/test_a64d/20240728_173337.html


ValidationError: 1 validation error for ThreadSummary
investor_perspective -> 0
  value is not a valid dict (type=type_error.dict)