<a href="https://colab.research.google.com/github/s1rens/Reddit-Suicidality-Monitor/blob/main/Main_Program.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install praw
!pip install cryptography
import pandas as pd
import praw
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
import spacy
import pickle
import csv
import time
import os
from cryptography.fernet import Fernet
nlp = spacy.load("en_core_web_sm")

In [None]:
# Initialising reddit instance
reddit = praw.Reddit(client_id='ENTER CLIENT ID', client_secret='ENTER CLIENT SECRET', username='ENTER REDDIT USERNAME', password='ENTER PASSWORD', user_agent='null') 

In [None]:
# load trained logistic regression model
with open ('model_logistic_regression_3000.pkl', 'rb') as file:
    model = pickle.load(file)

# load trained tfidf vectorizer
with open('tfidf_3000.pkl', 'rb') as file:
    tfidf = pickle.load(file)

# load important words
important_words = []
with open('important_words_3000.txt', 'r') as file:
    important_words.extend(file.read().split('\n'))
    
# load inspirational quotes
inspirational_quotes = []
with open('inspirational_quotes.txt', 'r', encoding='utf-8') as file:
    inspirational_quotes.extend(file.read().split('\n'))

# load key for encryting and decrypting
with open('key.key', 'rb') as file:
        key = file.read()
fnet = Fernet(key)

In [None]:
# append classified submissions to .csv with row format [subreddit,author,date,text, prediction]
def log_submission(submission_df, prediction_df):
    try:
        submission_df.drop(columns=['postfixed','goodlemma', 'goodlemma_most_important'], inplace=True)
        combined = pd.concat([submission_df, prediction_df], ignore_index=True, axis=1)
        display(combined)
        # append to existing file or create new file if it doesn't exist
        with open('submission_log.csv', 'a', newline='') as file: 
            if file.tell() == 0: # checks if file position is 0 (i.e. if its empty) and writes column headers
                file.write('subreddit,author,date,post,prediction\n')
            encrypted = combined.applymap(lambda x: fnet.encrypt(str(x).encode()))
            encrypted = encrypted.applymap(lambda x: x.decode())
            encrypted.to_csv(file, header=False, index=False)
        return combined
    except Exception as e:
        print('log_submission Error is: ' + str(e))
        pass

# This is the code for the bot.
def bot(updating, combined_df): 
    ############################################################################################################
    # Choose what subject and message to send to users who's post was classified as suicidal
    # Can do options so user can pick reply that might help them the most e.g. motivational quote, hotline numbers...
    subject = "Please read this."
    body = "\n\nIf you or someone you know is contemplating suicide, please reach out. You can find help at a National Suicide Prevention Lifeline\n\nUSA: 18002738255 US Crisis textline: 741741 text HOME\n\nUnited Kingdom: 116 123\n\nTrans Lifeline (877-565-8860)\n\nOthers: https://en.wikipedia.org/wiki/List_of_suicide_crisis_lines\nhttps://suicidepreventionlifeline.org\n\nIf you wish to stop being monitored, reply with 'remove me'"

    # Choose what message to reply when bot receives 'monitor me' message
    reply_start = "Your submissions are now being monitored\n\nThis means that every post you make will be automatically classified.\n\nIf it is detected that you posted something suicidal, you will receive a message with an inspirational quote and support line numbers.\n\nReply with 'remove me' to stop being monitored."
    # if already being monitored reply with:
    reply_alrdy = "You are already being monitored\n\nThis means that every post you make will be automatically classified.\n\nIf it is detected that you posted something suicidal, you will receive a message with an inspirational quote and support line numbers.\n\nIf you wish to stop being monitored, reply with 'remove me'."
    # if bot receives 'remove me' message:
    reply_stop = "Monitoring has been stopped\n\nThis means that you will no longer receive messages from this bot.\n\nReply with 'monitor me' to start being monitored again."
    # if redditor is not being monitored:
    reply_nomonitor = "You are not being monitored. Reply with 'monitor me' if you wish to be monitored.\n\nIf you choose to be monitored, every post you make will be automatically classified.\n\nIf it is detected that you posted something suicidal, you will receive a message with an inspirational quote and support line numbers."
    # if bot receives unrecognised command:
    reply_err = "Unrecognised command. Please reply with either 'monitor me' to begin being monitored, or 'remove me' to stop being monitored.\n\nIf you choose to be monitored, every post you make will be automatically classified.\n\nIf it is detected that you posted something suicidal, you will receive a message with an inspirational quote and support line numbers."
    ############################################################################################################
    try:
        if updating: # updating usernames
            username_list = []
            if os.path.isfile('usernames_to_monitor.txt'): # checks if file exists before reading it
                with open('usernames_to_monitor.txt', 'r') as file:
                    username_list = [line.strip() for line in file]
                    username_list = [username.encode() for username in username_list]
                    username_list = [fnet.decrypt(username).decode() for username in username_list]      
            for message in reddit.inbox.unread(limit=None):
                if 'monitor' in (message.body).lower():
                    if message.author.name not in username_list:
                        if 'removed' in username_list: # overwrites first instance of 'removed', with username
                            for i in range(len(username_list)):
                                if username_list[i] == 'removed':
                                    username_list[i] = message.author.name
                                    break
                        else:
                            username_list.append(message.author.name)
                        message.reply(reply_start) 
                    else:
                        message.reply(reply_alrdy)
                elif 'remove' in (message.body).lower():
                    if message.author.name in username_list:
                        for i in range(len(username_list)):
                            if username_list[i] == message.author.name:
                                username_list[i] = 'removed'
                                break
                        message.reply(reply_stop)
                    else:
                        message.reply(reply_nomonitor)
                else:
                    message.reply(reply_err)
                message.mark_read() # marks the message as read (as it has been replied to) so it won't be considered again
            with open('usernames_to_monitor.txt', 'w') as file:
                if username_list != []:
                    encrypted_usernames = [fnet.encrypt(username.encode()) for username in username_list]
                    encrypted_usernames = [username.decode() for username in encrypted_usernames]
                    for username in encrypted_usernames:
                        file.write('%s\n' % username)
            return username_list
        ################################## COMMENT THIS IF TESTING POSTS ############################################################
        else: # sending message
            if not updating and not combined_df.empty:
                for index in combined_df.index:
                    if combined_df[4][index] == 1: # post was predicted suicidal
                        quote_body = random.choice(inspirational_quotes) + body
                        reddit.redditor(combined_df[1][index]).message(subject, quote_body) # message user whose post was suicidal
                        print('Time taken for index:', index, (time.time()-combined_df[2][index])+(combined_df[2][index]-combined_df[2][0]))
            return
        #############################################################################################################################
    except Exception as e:
        print('bot Error is: ', e)
        pass

# preprocess the text column to be classified
def preprocess(submission_df):
    submission_df['postfixed'] = submission_df['post'].str.lower()
    submission_df.replace('\\n', '', inplace=True, regex=True)
    submission_df.replace('\\n\\n', '', inplace=True, regex=True)
    submission_df['postfixed'].replace(to_replace='http\S+', value='', inplace=True, regex=True) # remove URLs
    submission_df['postfixed'].replace(to_replace='amp;\S+', value='', inplace=True, regex=True) # remove formatting
    submission_df['goodlemma'] = submission_df['postfixed'].apply(lambda x: " ".join([y.lemma_ for y in nlp(x) if not y.is_stop and not y.is_punct]))
    submission_df['goodlemma_most_important'] = submission_df['goodlemma'].apply(lambda x: " ".join([str(y) for y in nlp(x) if str(y) in important_words]))
    return submission_df

# extracts data from generator and append it to a list
def append(submission_data, submission):
    subreddit = str(submission.subreddit)
    author = str(submission.author)
    date = int(submission.created_utc)
    post = str(submission.title + " " + submission.selftext)
    submission_data.append([subreddit, author, date, post])
    return submission_data

# extracts features to run model on
def extract_features(submission_df_pp, tfidf):
    tf_matrix = tfidf.transform(submission_df_pp['goodlemma'])
    X = pd.DataFrame(tf_matrix.toarray())
    return X

# runs model on data to create predictions
def classify(model, tfidf, submission_df_pp):
    X = extract_features(submission_df_pp, tfidf)
    y_pred = pd.DataFrame(model.predict(X), columns=['prediction'])
    return y_pred
    
# can later expand arguments to an array so user can choose tfidf, word2vec or both
def monitor(model, tfidf):
    def iterable():  
        submission_data = []
        usernames = []
        function_list = []
        generator_list = []
        while True:
            try: 
                for generator in update_generators(usernames, function_list, generator_list):
                    if generator is None: #username was removed from csv
                        continue
                    for submission in generator:
                        if submission != None:
                            submission_data = append(submission_data, submission)
                        elif submission is None:
                            break
                        else:
                            pass
                    if submission_data != [] and (time.time() - submission_data[0][2] >= 5):
                        submission_df = preprocess(pd.DataFrame.from_records(submission_data, columns=['subreddit', 'author', 'date', 'post']))
                        prediction_df = classify(model, tfidf, submission_df)
                        combined_df = log_submission(submission_df, prediction_df)
                        bot(False, combined_df) # if first argument is False, then calls function in 'messaging suicidal users' mode
                        submission_data = []
            except Exception as e:
                print('Iterator error is: ' + str(e))
                pass
    
    def update_usernames(usernames):
        try:
            new_list = bot(True, None) # if first argument is True, then calls function in 'updating username' mode
            if new_list == usernames: # no new changes
                pass
            elif new_list is None:
                pass
            else:
                if len(new_list) > len(usernames):
                    for j in range(len(usernames), len(new_list)):
                        usernames.append(new_list[j])  
                for i in range(len(usernames)):
                    if new_list[i] == 'removed': # username removed from file
                        usernames[i] = 'removed'
                    elif new_list[i] != usernames[i]: # 'removed' overwritten with new user
                        usernames[i] = new_list[i]
                    else:
                        pass
            return usernames
        except Exception as e:
            print('update_usernames Error is: ' + str(e))
            pass
        
    
    def update_generators(usernames, function_list, generator_list):
        try:
            indices = [] # indices that we need to add or delete a stream for
            tmp = usernames[:] # temporary array for storing original usernames list
            new_usernames = update_usernames(usernames)
            for i in range(len(new_usernames) - len(tmp)): # increases length of tmp, function_list, and generator_list to be equal to length of new_usernames
                tmp.append(None)
                function_list.append(None)
                generator_list.append(None)
            for j in range(len(new_usernames)): # make list of indexes where there were changes 
                if new_usernames[j] != tmp[j]:
                    indices.append(j) 
            for x, username in enumerate(new_usernames): # will look like ['user', 'user2', 'removed', 'user4', 'newuser']
                if x in indices:
                    if username == 'removed': # username was removed from file
                        function_list[x] = None
                        generator_list[x] = None
                    elif username != None and x < len(generator_list): # replaced 'removed' with a new user                    
                        function_list[x] = (lambda x: reddit.redditor(f"{username}").stream.submissions(skip_existing=True, pause_after=-1)) # CHANGE SKIP EXISTING=FALSE TO GET 100 MOST RECENT POSTS
                        generator_list[x] = function_list[x](username)
                    else:
                        pass
            return generator_list
        except Exception as e:
            print('update_generators Error is: ' + str(e))
            pass
    return iterable()
    
monitor(model, tfidf) # start monitoring with chosen model and feature engineering method(s)