In [None]:
subreddits = ["selfimprovement", "productivity", "DecidingToBeBetter"]
keywords = ["coach", "coaching"]
match_limit = 100
fuzz_threshold = 90
include_comments = False
days_ago = 7  # Set how many days back to search

In [None]:
import praw
import json
import csv
from datetime import datetime, timedelta
import pytz
from fuzzywuzzy import fuzz
from time import sleep

# Define Mountain Time (MT)
MT_TZ = pytz.timezone("America/Denver")

# Reddit API credentials
reddit = praw.Reddit(
    client_id="dy9fGuqXfFzXSiIjwMR76A",
    client_secret="OtMBalXE0EWew9impTq2huEU9Kp3oQ",
    user_agent="Post_Finder_by_u/Dr_Dan_Lathen"
)

HISTORY_FILE = "search_history.json"

def load_search_history():
    try:
        with open(HISTORY_FILE, "r") as file:
            return json.load(file)
    except (FileNotFoundError, json.JSONDecodeError):
        return {}

def save_search_history(history):
    with open(HISTORY_FILE, "w") as file:
        json.dump(history, file, indent=4)

def get_last_search_time(subreddit, keyword):
    history = load_search_history()
    last_timestamp = history.get(subreddit, {}).get(keyword, None)
    
    if last_timestamp:
        if isinstance(last_timestamp, str):
            # If timestamp is stored as a string, convert it back to a datetime object
            last_time_mt = datetime.strptime(last_timestamp, '%Y-%m-%d %H:%M:%S %Z')
        elif isinstance(last_timestamp, (int, float)):
            # If timestamp is a Unix timestamp, convert it to Mountain Time
            last_time_mt = datetime.utcfromtimestamp(last_timestamp).replace(tzinfo=pytz.utc).astimezone(MT_TZ)
        else:
            raise TypeError("Unsupported timestamp format")
            
        return last_time_mt
    return None


def update_search_time(subreddit, keyword, timestamp):
    history = load_search_history()
    if subreddit not in history:
        history[subreddit] = {}
    history[subreddit][keyword] = timestamp
    save_search_history(history)

def search_until_match_limit(subreddits, keywords, match_limit, fuzz_threshold, include_comments, days_ago):
    results_list = []
    total_checked, total_matches = 0, 0
    current_time = datetime.utcnow().replace(tzinfo=pytz.utc).astimezone(MT_TZ)  # Convert UTC to Mountain Time
    cutoff_time = current_time - timedelta(days=days_ago)  # Define the earliest post to consider

    for subreddit_name in subreddits:
        print(f"\nSearching in r/{subreddit_name} for keywords: {', '.join(keywords)}\n")
        subreddit = reddit.subreddit(subreddit_name)
        
        for keyword in keywords:
            last_search_time = get_last_search_time(subreddit_name, keyword)
            matches_in_subreddit = 0

            try:
                for post in subreddit.new(limit=None):
                    post_time = datetime.utcfromtimestamp(post.created_utc).replace(tzinfo=pytz.utc).astimezone(MT_TZ)

                    if post_time < cutoff_time:  
                        break  # Stop searching if the post is older than the set days_ago limit
                    
                    if last_search_time and post_time <= last_search_time:
                        continue  # Skip posts that were already searched
                    
                    total_checked += 1
                    content = (post.title + " " + (post.selftext or "")).lower()
                    
                    if keyword.lower() in content or fuzz.ratio(keyword.lower(), content) >= fuzz_threshold:
                        total_matches += 1
                        matches_in_subreddit += 1
                        result = {
                            "subreddit": subreddit_name,
                            "title": post.title,
                            "url": post.url,
                            "date": post_time.strftime('%Y-%m-%d %H:%M:%S %Z'),  # Include timezone info
                            "body": post.selftext or "No body content",
                            "matched_keyword": keyword
                        }
                        
                        if include_comments:
                            try:
                                post.comments.replace_more(limit=0)
                                result["comments"] = " | ".join([comment.body for comment in post.comments.list()[:5]])
                            except Exception as e:
                                print(f"Error fetching comments: {e}")
                        
                        results_list.append(result)
                        print(f"Title: {result['title']}\nURL: {result['url']}\nDate: {result['date']}\nKeyword: {result['matched_keyword']}\n")
                    
                    if matches_in_subreddit >= match_limit:
                        break

                # Store last search time in UTC for consistency
                update_search_time(subreddit_name, keyword, current_time.astimezone(pytz.utc).timestamp())
            
            except praw.exceptions.RedditAPIException as e:
                if "Ratelimit exceeded" in str(e):
                    print("Rate limit exceeded. Retrying in 60 seconds...")
                    sleep(60)
                    continue
                else:
                    print(f"Reddit API Error: {str(e)}")

    if results_list:
        output_file = f"{datetime.now().strftime('%Y-%m-%d')}_reddit_search_results.csv"
        save_results_to_csv(results_list, output_file, include_comments)
    
    print(f"\nTotal posts checked: {total_checked}")
    print(f"Total matches found: {total_matches}")

def save_results_to_csv(results, output_file, include_comments):
    fieldnames = ["subreddit", "title", "url", "date", "body", "matched_keyword"]
    if include_comments:
        fieldnames.append("comments")
    
    with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(results)
    print(f"\nResults saved to {output_file}")

search_until_match_limit(subreddits, keywords, match_limit, fuzz_threshold, include_comments, days_ago)


  current_time = datetime.utcnow().replace(tzinfo=pytz.utc).astimezone(MT_TZ)  # Convert UTC to Mountain Time



Searching in r/selfimprovement for keywords: coach, coaching



  post_time = datetime.utcfromtimestamp(post.created_utc).replace(tzinfo=pytz.utc).astimezone(MT_TZ)


Title: Im 15, life is shit,help
URL: https://www.reddit.com/r/selfimprovement/comments/1jiantl/im_15_life_is_shithelp/
Date: 2025-03-23 15:45:01 MDT
Keyword: coach


Searching in r/productivity for keywords: coach, coaching

Title: I am seeing a real trend in my coaching business.
URL: https://www.reddit.com/r/productivity/comments/1jir2z4/i_am_seeing_a_real_trend_in_my_coaching_business/
Date: 2025-03-24 07:48:39 MDT
Keyword: coach

Title: I am seeing a real trend in my coaching business.
URL: https://www.reddit.com/r/productivity/comments/1jir2z4/i_am_seeing_a_real_trend_in_my_coaching_business/
Date: 2025-03-24 07:48:39 MDT
Keyword: coaching


Searching in r/DecidingToBeBetter for keywords: coach, coaching

Title: 30-min ‘truth session’
URL: https://www.reddit.com/r/DecidingToBeBetter/comments/1jgpebp/30min_truth_session/
Date: 2025-03-21 13:34:32 MDT
Keyword: coach

Title: Mini realization that I should treat myself as though I were crafting the morals for a son.
URL: https://www.r

In [13]:
import json
from datetime import datetime, timedelta
import pytz

# Define Mountain Time (MT)
MT_TZ = pytz.timezone("America/Denver")
HISTORY_FILE = "search_history.json"

def load_search_history():
    """Load search history from the JSON file."""
    try:
        with open(HISTORY_FILE, "r") as file:
            return json.load(file)
    except (FileNotFoundError, json.JSONDecodeError):
        return {}

def convert_to_mt(utc_timestamp):
    """Convert a UTC timestamp to Mountain Time (MT)."""
    try:
        dt_utc = datetime.utcfromtimestamp(float(utc_timestamp)).replace(tzinfo=pytz.utc)
        return dt_utc.astimezone(MT_TZ)
    except ValueError:
        return None

def display_last_search_times():
    """Display last search times in Mountain Time (MT)."""
    history = load_search_history()
    
    if not history:
        print("No search history found.")
        return

    print("\nLast search times in Mountain Time (MT):\n")
    for subreddit, keywords in history.items():
        print(f"Subreddit: r/{subreddit}")
        for keyword, timestamp in keywords.items():
            mt_time = convert_to_mt(timestamp)
            if mt_time:
                formatted_time = mt_time.strftime('%Y-%m-%d %I:%M:%S %p %Z')  # Format with AM/PM
                print(f"  - Keyword: {keyword} → Last searched at: {formatted_time}")
        print()  # Add space between subreddits

def get_days_since_last_search():
    """Determine the number of days to research since the last search."""
    history = load_search_history()
    
    if not history:
        print("No search history found.")
        return 0  # No need to research any days
    
    last_search_dates = []
    
    for subreddit, keywords in history.items():
        for keyword, timestamp in keywords.items():
            mt_time = convert_to_mt(timestamp)
            if mt_time:
                last_search_dates.append(mt_time)
    
    if not last_search_dates:
        print("No valid timestamps found in search history.")
        return 0
    
    last_search_date = min(last_search_dates)  # Earliest recorded search time
    current_time = datetime.now(MT_TZ)
    
    days_since_last_search = (current_time - last_search_date).days
    print(f"\nYou need to research the last {days_since_last_search} days to cover all records.")
    
    return days_since_last_search

# Run the functions
display_last_search_times()



Last search times in Mountain Time (MT):

Subreddit: r/selfimprovement
  - Keyword: coach → Last searched at: 2025-03-25 12:34:37 PM MDT
  - Keyword: coaching → Last searched at: 2025-03-25 12:34:37 PM MDT

Subreddit: r/productivity
  - Keyword: coach → Last searched at: 2025-03-25 12:34:37 PM MDT
  - Keyword: coaching → Last searched at: 2025-03-25 12:34:37 PM MDT

Subreddit: r/DecidingToBeBetter
  - Keyword: coach → Last searched at: 2025-03-25 12:34:37 PM MDT
  - Keyword: coaching → Last searched at: 2025-03-25 12:34:37 PM MDT



  dt_utc = datetime.utcfromtimestamp(float(utc_timestamp)).replace(tzinfo=pytz.utc)


In [None]:
def get_days_since_last_search():
    """Determine the number of days to research since the last search."""
    history = load_search_history()
    
    if not history:
        print("No search history found.")
        return 0  # No need to research any days
    
    last_search_dates = []
    
    for subreddit, keywords in history.items():
        for keyword, timestamp in keywords.items():
            mt_time = convert_to_mt(timestamp)
            if mt_time:
                last_search_dates.append(mt_time)
    
    if not last_search_dates:
        print("No valid timestamps found in search history.")
        return 0
    
    last_search_date = min(last_search_dates)  # Earliest recorded search time
    current_time = datetime.now(MT_TZ)
    
    days_since_last_search = (current_time - last_search_date).days
    print(f"\nYou need to research the last {days_since_last_search} days to cover all records.")
    
    return days_since_last_search

get_days_since_last_search()


You need to research the last 0 days to cover all records.


  dt_utc = datetime.utcfromtimestamp(float(utc_timestamp)).replace(tzinfo=pytz.utc)


0