# Reddit

In [2]:
import praw
from dotenv import load_dotenv
import os

load_dotenv()

reddit = praw.Reddit(
    client_id=os.environ["REDDIT_CLIENT_ID"],
    client_secret=os.environ["REDDIT_CLIENT_SECRET"],
    user_agent="Comment Extraction (by u/trnka)",
)


In [6]:
submission = reddit.submission(url="https://www.reddit.com/r/povertyfinance/comments/bg7ip2/internet_medicine_is_awesome_98point6_was_so_so/")

from praw.models import MoreComments

submission.comments.replace_more(limit=10)
for top_level_comment in submission.comments:
    if isinstance(top_level_comment, MoreComments):
        continue

    print(f"""
[{top_level_comment.score}] Comment by {top_level_comment.author}:
{top_level_comment.body.strip()}


""")



[9] Comment by None:
That sounds like a great resource! Especially for my two most common ailments, pink eye (I work with kids, they’re gross) and strep. Thanks for sharing




[1] Comment by rassmann:
General mod note:  See users disclaimer at the top, YMMV!




[3] Comment by AnotherDay_RS:
Sounds like a great service but what i don't understand is, If they cannot prescribe you the medication due to ie; controlled substance etc. Why is she giving you price to purchase it under GoodRX?

Sorry that part wasn't clear to me, I thought you needed a doctor approval to prescribe controlled substances.




[3] Comment by MarketWorldly9908:
My husband and I have used 98.6 three times. All three times they did not prescribe the needed antibiotic to get better. I had an ear infection, my husband had an ear infection, then I had a sinus infection. We had to wait and get into our family doctor, so we paid 98.6 and our family doctor. I would not recommend them!




[2] Comment by NightSkyButterfl

In [8]:
submission.title, submission.selftext

('Internet medicine is awesome, 98point6 was so so helpful for me',
 '**TL;DR- $20 got me an awesome appointment with a nice doctor and a prescription for a medication I could afford that solved my issue.**\n\n*Disclaimer: This particular thing worked well for me so I\'m going to tell you about it. Everyone is different, so it might not work as well (or at all) for you.  Take what you find useful from this post and ignore the rest.  I\'m not compensated or connected to the website I\'m discussing.*\n\nSo like a lot of people on here I\'m usually either uninsured or underinsured.  Right now it\'s underinsured with a high deductible, so when I messed my back up badly enough that I could barely move I freaked.  I\'ve got scoliosis, a fucked up spine, bad knees, and muscles that love to spasm uncontrollably for days on end.  I\'d run out of my prescription muscle relaxants last fall and hadn\'t been able to afford another appointment.  Advil helps a little but it wears off in 4 hours so I 

# Design notes

1. Transform the product name into an appropriate query
2. Google search reddit.com with the query (possibly multiple time windows - alltime, 1y, 1m?)
3. Process each post:
    - Pull the top post
    - Pull any comments
    - Identify whether the post is primarily about the product, jobs at the company, or bizdev
4. Summarize the product-related posts:
    - Timeline of posts with titles and one key excerpt, linking to the original. Also include the amount of activity on the post and the score

In [11]:
# Helper to convert a Reddit thread to text
from datetime import datetime

DATE_FORMAT = "%Y-%m-%d"
def utc_to_date(utc: float):
    return datetime.utcfromtimestamp(utc).strftime(DATE_FORMAT)

def reddit_thread_to_text(submission):
    submission.comments.replace_more(limit=10)

    text = f"""
{submission.title} by {submission.author} on {utc_to_date(submission.created_utc)} [{submission.score:+d} votes]:
{submission.selftext}
"""
    for top_level_comment in submission.comments:
        if isinstance(top_level_comment, MoreComments):
            continue

        text += f"""
Comment by {top_level_comment.author} on {utc_to_date(top_level_comment.created_utc)} [{top_level_comment.score:+d} votes]:
{top_level_comment.body}
"""
    return text

print(reddit_thread_to_text(submission))


Internet medicine is awesome, 98point6 was so so helpful for me by FrugalChef13 on 2019-04-22 [+58 votes]:
**TL;DR- $20 got me an awesome appointment with a nice doctor and a prescription for a medication I could afford that solved my issue.**

*Disclaimer: This particular thing worked well for me so I'm going to tell you about it. Everyone is different, so it might not work as well (or at all) for you.  Take what you find useful from this post and ignore the rest.  I'm not compensated or connected to the website I'm discussing.*

So like a lot of people on here I'm usually either uninsured or underinsured.  Right now it's underinsured with a high deductible, so when I messed my back up badly enough that I could barely move I freaked.  I've got scoliosis, a fucked up spine, bad knees, and muscles that love to spasm uncontrollably for days on end.  I'd run out of my prescription muscle relaxants last fall and hadn't been able to afford another appointment.  Advil helps a little but it 

In [12]:
len(reddit_thread_to_text(submission))

6214

# Summarize with LangChain

In [14]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

# See also
# https://www.reddit.com/r/ChatGPT/comments/11twe7z/prompt_to_summarize/
# https://www.reddit.com/r/ChatGPT/comments/13na8yp/highly_effective_prompt_for_summarizing_gpt4/

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You're an expert at reading and summarizing public online. discussions.

            Please read and summarize the following Reddit thread.
            The summary should cover all the key points and main ideas presented in the original text, while also condensing the information into a concise and easy-to-understand format. 
            Please ensure that the summary includes relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. 
            The length of the summary should be appropriate for the length and complexity of the original text, providing a clear and accurate overview without omitting any important information.
            """,
        ),
        (
            "human", 
            """
            Reddit thread: 
            {text}
            """
            ),
    ]
)

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
runnable = prompt | llm

In [15]:
from pprint import pprint

retval = runnable.invoke({"text": reddit_thread_to_text(submission)})
pprint(retval)

AIMessage(content="The Reddit thread discusses a user's positive experience with the online medical service 98point6. The user, who was underinsured and in need of a prescription for their back pain, found the service helpful and affordable. They highlighted the ease of use, the low cost of $20 for a one-year membership with unlimited online appointments, and the compassionate and helpful doctor they interacted with. The doctor recommended a suitable medication and even checked the price using GoodRx to ensure affordability for the user. Other users in the thread shared their experiences and opinions on internet medicine, with some praising the service for quick access to medications and others expressing concerns about not receiving the needed antibiotics. Overall, the thread showcases the convenience and cost-effectiveness of online medical services like 98point6 for basic healthcare needs.", response_metadata={'token_usage': {'completion_tokens': 160, 'prompt_tokens': 1608, 'total_t

In [18]:
pprint(retval.content)

("The Reddit thread discusses a user's positive experience with the online "
 'medical service 98point6. The user, who was underinsured and in need of a '
 'prescription for their back pain, found the service helpful and affordable. '
 'They highlighted the ease of use, the low cost of $20 for a one-year '
 'membership with unlimited online appointments, and the compassionate and '
 'helpful doctor they interacted with. The doctor recommended a suitable '
 'medication and even checked the price using GoodRx to ensure affordability '
 'for the user. Other users in the thread shared their experiences and '
 'opinions on internet medicine, with some praising the service for quick '
 'access to medications and others expressing concerns about not receiving the '
 'needed antibiotics. Overall, the thread showcases the convenience and '
 'cost-effectiveness of online medical services like 98point6 for basic '
 'healthcare needs.')


# Google Search on Reddit for sources

In [23]:

from googlesearch import search
from functools import lru_cache
from typing import Iterable

@lru_cache(1000)
def reddit_search(query: str, num=10, stop=10, pause=2) -> Iterable[str]:
    query = f'site:reddit.com "{query}""'
 
    return list(search(query, num=num, stop=stop, pause=pause))

def test_search():
    for url in reddit_search("98point6", stop=20, pause=2):
        print(url)

test_search()

https://www.reddit.com/r/povertyfinance/comments/bg7ip2/internet_medicine_is_awesome_98point6_was_so_so/
https://www.reddit.com/r/Chipotle/comments/l5bbt9/has_anyone_used_the_98point6_primary_care/
https://www.reddit.com/r/AmazonFC/comments/rgxxbw/has_anyone_used_amazon_care_app_or_98point6_app/
https://www.reddit.com/r/AmazonFC/comments/nqrtaw/98point6/
https://www.reddit.com/r/depressionregimens/comments/bko5k9/psa_98point6_is_an_amazing_app_for_medication/
https://www.reddit.com/r/CostcoEmployee/comments/11wu6zh/anyone_use_98point6_are_they_helpful_better_than/
https://www.reddit.com/r/TTC_PCOS/comments/ipmklh/98point6_pcos_appointment_experience/
https://www.reddit.com/r/WalgreensStores/comments/14n48uy/virtual_doctor/
https://www.reddit.com/r/QuikTrip/comments/u2j0ut/my_apologies_for_any_bad_links_qt_is_fully/
https://www.reddit.com/r/tretinoin/comments/lx0zjb/didnt_have_to_walk_into_a_dermatologist_or_even_a/
https://www.reddit.com/r/AmazonFC/comments/nqxfli/besides_using_98point

In [25]:
thread_urls = reddit_search("98point6", stop=20, pause=2)

In [27]:
import random

# Select a random thread URL from the list
random_url = random.choice(thread_urls)

# Get the submission from the random URL
random_submission = reddit.submission(url=random_url)

# Generate a textual representation of the submission
text_representation = reddit_thread_to_text(random_submission)
pprint(text_representation)

# Summarize the text representation using LangChain
summary_result = runnable.invoke({"text": text_representation})

pprint(summary_result.content)

('\n'
 'Currently sick and called ERC to get the 5 days excused absences for COVID '
 'testing. Regardless of the test result, whether it’s covid or not, I '
 'definitely can’t work. Should I wait to get my test done so that way I can '
 'use these days to get better? by thegeissberger on 2021-09-15 [+3 votes]:\n'
 '\n'
 '\n'
 'Comment by None on 2021-09-16 [+8 votes]:\n'
 'Download 98point6 its a telehealth app where you can text doctors and get '
 'doctors notes and prescriptions and stuff. Tell them you’ve come into '
 'contact with someone that has COVID and you’re experiencing symptoms. Tell '
 'them you need a doctors note for work and they will email you one. Enjoy '
 'your paid 2 week vacation.\n'
 '\n'
 'Comment by ReverseGenjutsu on 2021-09-16 [+3 votes]:\n'
 "If you think you have covid get tested. If its positive you'll get paid "
 'leave. If you wait too long it could be negative even if you had covid.\n'
 '\n'
 'Comment by Intellectualthinkr on 2021-09-16 [+1 votes]:\n'
 

In [30]:
html_template = """
<html>
<head>
    <title>Reddit Thread Summary</title>
</head>
<body>
    <h2>Summary</h2>
    <p>{summary_text}</p>

    <h2>Original Thread</h2>
    <p>{original_text}</p>
</body>
</html>
"""

def render_summary(original_text, summary_text):
    return html_template.format(original_text=original_text.replace("\n", "<br>"), summary_text=summary_text.replace("\n", "<br>"))

html = render_summary(text_representation, summary_result.content)

from IPython.display import HTML
display(HTML(html))

In [33]:
type(summary_result)

langchain_core.messages.ai.AIMessage

In [50]:
from typing import NamedTuple
from langchain_core.messages.ai import AIMessage

class ThreadResult(NamedTuple):
    submission: praw.models.Submission
    text: str
    summary_result: AIMessage

    def to_html(self):
        text = self.text.replace("\n", "<br>")
        return f"""
<html>
<body>
    <h1>{self.submission.title} by {self.submission.author} on {utc_to_date(self.submission.created_utc)}</h1>
    <a href="{self.submission.url}">{self.submission.url}</a>
    <h2>Summary</h2>
    <p>{self.summary_result.content}</p>

    <h2>Original Thread</h2>
    <p>{text}</p>
</body>
</html>
"""

def process_url(url: str) -> ThreadResult:
    submission = reddit.submission(url=url)
    text = reddit_thread_to_text(submission)
    summary_result = runnable.invoke({"text": text})
    return ThreadResult(submission=submission, text=text, summary_result=summary_result)


In [51]:
random_url = random.choice(thread_urls)
random_thread_result = process_url(random_url)
display(HTML(random_thread_result.to_html()))

In [49]:
random_url

'https://www.reddit.com/r/povertyfinance/comments/bg7ip2/internet_medicine_is_awesome_98point6_was_so_so/'