In [1]:
import praw
import os
from dotenv import load_dotenv
import sys
sys.path.insert(0, '..')
from utility import *
from tqdm import tqdm


In [2]:
def check_relevance(post):
    relevance_system_prompt = f'''
    You are an excellent question validator. Verify if the post is relevant enough to address concerns about Ehlers-Danlos syndrome
    If the post is about Ehlers-Danlos syndrome related symptoms or treatments or drugs or lifestyle, it can be considered relevant. 
    If the post is about something very vague, consider it as non-relevant.
    If relevant, return the reponse as JSON
    {{relevance : True, reason : <your reason>, post: {post.title}}}
    else, return:
    {{relevance : False,reason : <your reason>,post: {post.title}}}
    '''
    try:
        response = get_GPT_response(post.title, relevance_system_prompt, os.environ.get('MODEL_NAME'), temperature=0.3)
    except:
        response = f'''
            {{relevance : False,reason : 'GPT did not respond',post: {post.title}}}
        '''
    return response




def summarize_comments(post):
    comments = '\n'.join(list(map(lambda x:x.body, post.comments.list())))
    summarization_system_prompt = f'''
        You are an expert in summarizing a text passage. Your job is to summarize the given text passage such that it captures the essence of the passage.
        IMPORTANT: In your summary, make sure to include the statement that the information is based on public posts from online platforms. 
        Report your response in JSON format as follows:
        {{instruction: {post.title}, output : <your summary>}}        
    '''
    try:
        comments = 'Here is the input text passage to summarize: ' + comments
        response = get_GPT_response(comments, summarization_system_prompt, os.environ.get('MODEL_NAME'), temperature=0.7)
    except:
        response = None
    return response
        

In [3]:
#Load environment variables (save a .eds.env file with your reddit credentials in the repo root folder)
dotenv_path = os.path.join(REPO_ROOT_PATH, '.eds.env')
load_dotenv(dotenv_path)

client_id = os.environ.get('client_id')
client_secret = os.environ.get('client_secret')
user_agent = os.environ.get('user_agent')

reddit = praw.Reddit(
    client_id=client_id,
    client_secret=client_secret,
    user_agent=user_agent
)

In [31]:
subreddit = reddit.subreddit('eds')

search_query = 'Ehlers-Danlos syndrome diet'


In [32]:
search_results = subreddit.search(search_query, sort='relevance', limit=1000)


In [33]:

summary_list = []
for post in tqdm(search_results):
    relevance = check_relevance(post)
    try:
        if json.loads(relevance)['relevance']:
            summary = summarize_comments(post)
            if summary:
                try:
                    summary_list.append(json.loads(summary))
                except:
                    continue
    except:
        continue
            


60it [00:14,  4.21it/s]


In [50]:
summary_list[-14]

{'summary': 'The information provided is based on public posts from online platforms. The passage discusses the confusion around the use of a specific form for diagnosing hEDS in Canada and the lack of standardization in diagnosis. It also mentions the frustration with doctors who dismiss symptoms and the possibility of a genetic marker for hEDS. The post shares a link to the Ehlers-Danlos Society website, which is considered a good resource. It concludes by pointing out that doctors have the flexibility to make their own judgments in diagnosing hEDS, which can lead to inconsistent diagnoses.'}