# Find, fetch, and summarize a single article

In [4]:
from core import CompanyProduct
import reddit.summarizer
import reddit.search
import reddit.fetch

def test_single_summary(target: CompanyProduct):
    reddit_client = reddit.fetch.init()

    # Search for URLs
    post_url = reddit.search.find_submission_urls(target, num_results=10)[0]

    # Fetch the Submissions from Reddit
    post_submission = reddit_client.submission(url=post_url)

    # Summarize each
    return reddit.summarizer.summarize_submission(target, post_submission)

summary = test_single_summary(CompanyProduct.same("98point6"))

print(summary.to_markdown())



# Internet medicine is awesome, 98point6 was so so helpful for me (thread id: bg7ip2)

The Reddit thread discusses user experiences with 98point6, an online medical service that provides affordable consultations and prescriptions. Users share both positive and negative experiences, highlighting the service's accessibility and limitations.

## User Experience

### Strengths


- "It was amazing. ... I would 100% use 98point6 again, especially since my $20 gets me unlimited visits for a year." (Comment ID: bg7ip2)
- "She was so kind, and didn't make me feel dumb or worthless for not being able to afford a full price doctor's appointment." (Comment ID: bg7ip2)
- "This amazing awesome doctor picked up her iPhone and said 'I have an app for a website called GoodRx...'" (Comment ID: bg7ip2)
- "Helped me get paxlovid for covid more quickly than I otherwise would have been able to." (Comment ID: idl52u3)




### Weaknesses


- "My husband and I have used 98.6 three times. All three times they

In [3]:
print(summary.to_html())





<html>
<head>
    <title></title>
</head>
<body>

<h1>
Internet medicine is awesome, 98point6 was so so helpful for me (thread id: bg7ip2)
</h1>

<p>
The Reddit thread discusses user experiences with 98point6, an online medical service that provides affordable consultations and prescriptions. Users share both positive and negative experiences, highlighting the service's accessibility and limitations.
</p>

<h2>User Experience</h2>

<h3>Strengths</h3>


    <ul>
        
        <li>"It was amazing. ... I would 100% use 98point6 again, especially since my $20 gets me unlimited visits for a year." (source: 

bg7ip2

)</li>
        
        <li>"This amazing awesome doctor ... was actively prepared to address the needs of people who have limited funds." (source: 

bg7ip2

)</li>
        
        <li>"Helped me get paxlovid for covid more quickly than I otherwise would have been able to." (source: 

idl52u3

)</li>
        
    </ul>
    


<h3>Weaknesses</h3>


    <ul>
        
     

# Find, fetch, and summarize multiple articles

In [6]:
import reddit.summarizer
import reddit.search
import reddit.fetch

from core import CompanyProduct

def short_test(target: CompanyProduct, num_posts: int = 10):
    reddit_client = reddit.fetch.init()

    # Search for URLs
    post_urls = reddit.search.find_submission_urls(target, num_results=num_posts)[:2]

    # Fetch the Submissions from Reddit
    post_submissions = [reddit_client.submission(url=url) for url in post_urls]

    # Summarize each
    post_summaries = [reddit.summarizer.summarize_submission(target, submission) for submission in post_submissions]

    # Aggregate the summaries
    aggregate_summary = reddit.summarizer.summarize_summaries(target, post_summaries)

    return aggregate_summary

aggregate_summary = short_test(CompanyProduct.same("98point6"))

print(aggregate_summary.to_markdown())



# 98point6 / 98point6

The Reddit threads discuss user experiences with 98point6, an online medical service that offers affordable consultations and prescriptions. Users highlight the service's accessibility and affordability, but also express concerns about its limitations in prescribing medications and handling more serious medical issues. There is no information available regarding employee experiences or investor perspectives.

## User Experience

### Strengths


- "It was amazing. ... I would 100% use 98point6 again, especially since my $20 gets me unlimited visits for a year." (Comment ID: bg7ip2)
- "She was so kind, and didn't make me feel dumb or worthless for not being able to afford a full price doctor's appointment." (Comment ID: bg7ip2)
- "This amazing awesome doctor ... was actively prepared to address the needs of people who have limited funds." (Comment ID: bg7ip2)
- "Helped me get paxlovid for covid more quickly than I otherwise would have been able to." (Comment ID: 

# Standardized evaluation

In [7]:
import os
import re

import reddit.summarizer
import reddit.search
import reddit.fetch

from core import CompanyProduct

from datetime import datetime
from collections import ChainMap

def short_evaluation(target: CompanyProduct, num_threads=2, min_comments=2):
    # Make the output folder
    folder_name = re.sub(r"[^a-zA-Z0-9]", "_", f"{target.company} {target.product}")
    folder_path = f"evaluation/{folder_name}"
    os.makedirs(folder_path, exist_ok=True)

    reddit_client = reddit.fetch.init()

    # Search for URLs
    post_urls = reddit.search.find_submission_urls(target, num_results=10)

    # Fetch the Submissions from Reddit
    post_submissions = [reddit_client.submission(url=url) for url in post_urls]

    # Filter Submissions to only those with enough comments
    post_submissions = [submission for submission in post_submissions if submission.num_comments >= min_comments]

    if len(post_submissions) == 0:
        print(f"No posts with enough comments found for {target}")
        return

    # Limit the number of threads
    post_submissions = post_submissions[:num_threads]

    # Summarize each
    post_summaries = [reddit.summarizer.summarize_submission(target, submission) for submission in post_submissions]

    # Filter out any with over 1 hallucinated comment id
    post_summaries = [summary for summary in post_summaries if summary.is_under_max_hallucinations(1, debug=True)]

    if len(post_summaries) == 0:
        print(f"No valid summaries found for {target}")
        return
    
    # Index permalinks
    permalinks = ChainMap(*[reddit.fetch.index_permalinks(summary.submission) for summary in post_summaries])

    # Aggregate the summaries
    aggregate_summary = reddit.summarizer.summarize_summaries(target, post_summaries)

    # Create the filename using the current timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{folder_path}/{timestamp}.html"

    with open(filename, "w") as f:
        f.write(aggregate_summary.to_html(permalinks=permalinks))

    print(f"Results for {target} saved to {filename}")

short_evaluation(CompanyProduct.same("98point6"), 5)


Results for CompanyProduct(company='98point6', product='98point6') saved to evaluation/98point6_98point6/20240730_092215.html


In [2]:
short_evaluation(CompanyProduct("Rad AI", "Omni"), 5)
# short_evaluation(CompanyProduct("Singularity 6", "Palia"), 5)
# short_evaluation(CompanyProduct.same("Instacart"), 5)


Filtering Is the Grey Matter DNA Asmuths? with evaluation Evaluation(claims_made=4, quotes_in_source=2, comment_ids_in_source=2)
Results for CompanyProduct(company='Rad AI', product='Omni') saved to evaluation/Rad_AI_Omni/20240729_112333.html


In [2]:
# Test permalinks
import src.reddit.fetch as fetch

def test_index_permalinks():
    """Test that we can index permalinks in a Reddit thread"""
    reddit_client = fetch.init()

    submission = reddit_client.submission(
        url="https://www.reddit.com/r/ChatGPT/comments/11twe7z/prompt_to_summarize/"
    )
    return fetch.index_permalinks(submission)

links = test_index_permalinks()

from pprint import pprint
pprint(links)

{'11twe7z': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/',
 'jcm7h9z': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/jcm7h9z/',
 'jf2x8tv': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/jf2x8tv/',
 'jf3qdny': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/jf3qdny/',
 'jf3qr9a': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/jf3qr9a/',
 'jf3w7dk': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/jf3w7dk/',
 'jqcmckq': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/jqcmckq/',
 'jyrpv2g': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/jyrpv2g/',
 'kvxa6ye': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/kvxa6ye/',
 'kwo1bpp': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/kwo1bpp/',
 'l16umyv': '/r/ChatGPT/comments/11twe7z/prompt_to_summarize/l16umyv/'}
