We will read a file that will have the following format:

<Link>,<replies, (+) comments, (+) votes>

The link can either be a post link or a comment link. If it's a post link, it will monitor the entire post, or if it's a comment, just that. Replies will monitor when there are any big replies, comments are for subcomments, votes for any vote changes. Add + between them to monitor more than one.

In [1]:
import pandas as pd;
import praw;
import json;
from urllib.parse import urlparse;
import pickle;
import time;
from os.path import exists;
from difflib import SequenceMatcher

In [2]:
def get_id_from_url(url):
    parsed = urlparse(url);
    parts = parsed.path.split('/');
    if parts[-1] == '':
        parts.pop();
    
    submission_id = parts[parts.index('comments') + 1];
    
    if len(parts) == 7:
        comments_id = parts[-1];
        return [submission_id, comments_id];
    return [submission_id];

In [3]:
def get_replies(submission_ids):
    replies = [];
    
    submission = praw_obj.submission(submission_ids[0]);
    submission.comments.replace_more(limit=0)
    
    if len(submission_ids) == 2:
        comment_id = submission_ids[1];
        
        for comment in submission.comments:
            if comment == comment_id:
                comment_thread_id = comment;
                break;
        
        for comment in submission.comments.list():
            if comment.parent_id[3:] == comment_thread_id:
                replies.append(comment.body);
    else:
        for comment in submission.comments:
            replies.append(comment.body);

    return replies;

In [4]:
def get_comments(submission_ids):
    replies = [];
    
    submission = praw_obj.submission(submission_ids[0]);
    submission.comments.replace_more(limit=0)
    
    if len(submission_ids) == 2:
        comment_id = submission_ids[1];
        
        for comment in submission.comments:
            if comment == comment_id:
                comment_thread_id = comment;
                break;
        comment_queue = [comment_thread_id];
        while comment_queue:
            next_comment = comment_queue.pop(0);
            replies.append(next_comment.body);
            comment_queue.extend(next_comment.replies)

    else:
        for comment in submission.comments.list():
            replies.append(comment.body);

    return replies;

In [5]:
def get_votes(submission_ids):
    submission = praw_obj.submission(submission_ids[0]);
    
    if len(submission_ids) == 2:
        for comment in submission.comments:
            if comment == submission_ids[1]:
                vote = comment.score;
    else:
        vote = submission.score;
    
    return vote;

In [7]:
#replies = get_replies('https://www.reddit.com/r/AtlantaUnited/comments/6zmues/transaction_tuesday_september_12_2017/?st=j7lyd7mg&sh=fb2ffdba');

In [8]:
#comments = get_replies('https://www.reddit.com/r/AtlantaUnited/comments/6zmues/transaction_tuesday_september_12_2017/dmwpm8y/');

In [9]:
#votes = get_votes('https://www.reddit.com/r/AtlantaUnited/comments/6zmues/transaction_tuesday_september_12_2017/dmwpm8y/');

In [10]:
def save_replies(replies, title):
    pickle.dump(replies, open(title + '_replies.pkl', 'wb'));

In [11]:
def save_comments(comments, title):
    pickle.dump(comments, open(title + '_comments.pkl', 'wb'));

In [12]:
def save_votes(votes, title):
    pickle.dump(votes, open(title + '_votes.pkl', 'wb'));

In [13]:
def save_duration(duration_table):
    pickle.dump(duration_table, open('duration.pkl', 'wb'));

In [14]:
def load_replies(title):
    title = title + '_replies.pkl';
    replies = [];
    if exists(title):
        replies = pickle.load(open(title, 'rb'));
    return replies;

In [15]:
def load_comments(title):
    title = title + '_comments.pkl';
    comments = [];
    if exists(title):
        comments = pickle.load(open(title, 'rb'));
    return comments;

In [16]:
def load_votes(title):
    title = title + '_votes.pkl';
    votes = 0;
    if exists(title):
        votes = pickle.load(open(title, 'rb'));
    return votes;

In [17]:
def load_duration():
    title = 'duration.pkl';
    duration_table = {};
    if exists(title):
        duration_table = pickle.load(open('duration.pkl', 'rb'));
    return duration_table;

In [18]:
def update_duration(submission_key, duration_table):
    duration_table[submission_key] = time.time();
    return duration_table;

In [19]:
def should_update(submission_key, duration_table, duration):
    if submission_key not in duration_table:
        return True;
    
    time_since_last_update = time.time() - duration_table[submission_key];
    
    return (time_since_last_update >= duration);

In [20]:
def get_updates(submission_ids, submission_key, update_types):
    updates = {};
    all_vals = {};
    
    if 'replies' in update_types:
        all_replies = get_replies(submission_ids);
        old_replies = load_replies(submission_key);
        
        new_replies = [i for i in all_replies if not i in old_replies]
        updates['replies'] = new_replies;
        all_vals['replies'] = all_replies;
    
    if 'comments' in update_types:
        all_comments = get_comments(submission_ids);
        old_comments = load_comments(submission_key);
        
        new_comments = [i for i in all_replies if not i in old_comments]
        updates['comments'] = new_comments;
        all_vals['comments'] = all_comments;
    
    if 'votes' in update_types:
        current_vote = get_votes(submission_ids);
        previus_vote = load_votes(submission_key);
        
        vote_difference = current_vote - previus_vote;
        updates['votes'] = (vote_difference);
        all_vals['votes'] = current_vote;
    
    return updates, all_vals;

In [21]:
def save_updates(submission_key, updates):
    if 'replies' in updates:
        save_replies(updates['replies'], submission_key);
    
    if 'comments' in updates:
        save_comments(updates['comments'], submission_key);
    
    if 'votes' in updates:
        save_votes(updates['votes'], submission_key);

In [22]:
#Main function
links = pd.read_csv('links.txt', header=None);
creds = json.load(open('creds.json', 'r'));
praw_obj = praw.Reddit(user_agent='WatchBot',
                     client_id=creds['client_id'], client_secret=creds['client_secret']);



In [23]:
duration_table = load_duration();

for idx in range(len(links)):
    submission_ids = get_id_from_url(links.iloc[idx][0]);
    submission_ref = '_'.join(submission_ids);
    
    update_types = links.iloc[idx][1];
    duration = links.iloc[idx][2];
    
    if should_update(submission_ref, duration_table, duration):
        updated_vals, all_vals = get_updates(submission_ids, submission_ref, update_types);
        save_updates(submission_ref, all_vals);
        update_duration(submission_ref, duration_table);
    
save_duration(duration_table);

dict_keys(['replies', 'comments', 'votes'])