In [2]:
import praw
import mq
import redis
import tqdm
import preprocessing
import spacy
from dotenv import load_dotenv
from os import getenv
from extract import extract_comment

In [4]:
# check if environment is loaded corrcetly and load it
load_dotenv()

True

In [5]:
# get reddit consumer user reddit script
consumer = praw.Reddit(
    client_id=getenv('REDDIT_ID'),
    client_secret=getenv('REDDIT_SECRET'),
    user_agent='python_consumer'
)
# subreddit is politics
subreddit = consumer.subreddit('politics')

In [None]:
r = redis.Redis()
for comment in tqdm.tqdm(subreddit.stream.comments()):
    msg = extract_comment(comment)
    msg = mq.serialize_message_data(msg)
    r.publish('comments',msg)

In [None]:
# getting the comments from the consumer
sub = r.pubsub(ignore_subscribe_messages=True)
sub.subscribe(['comments'])

In [8]:
# data cleaning and preprocessing
def process_message(msg):
    msg = mq.read_message_data(msg)['text']
    # remove urls
    text = preprocessing.replace_urls(msg)
    # split sentences
    sentences = preprocessing.message_to_sentences(text)
    return sentences

In [None]:
# preprocessing and cleaning the text as we are getting them from the consumer
for post in tqdm(sub.listen()):
    sentences = process_message(post)
    print(sentences)
    for s in sentences:
        r.publish('processed',mq.serialize_message_data(s))

In [None]:
# load spacy model and using it to do entity recognition
nlp = spacy.load("en_core_web_sm")
nlp.pipeline
sub = r.pubsub(ignore_subscribe_messages=True)
sub.subscribe(['processed'])
# adding this part fo the nlp model to our pipeline and conducting entity recognition
for message in tqdm(sub.listen()):
    sentence = mq.read_message_data(message)
    doc = nlp(sentence)
    entities = doc.ents
    if len(entities) > 0:
        output = dict(sentence=sentence,entities=entities)
        r.publish('entity',mq.serialize_message_data(output))

In [None]:
from pathlib import Path
from fastai.text import load_data, text_classifier_learner, AWD_LSTM
# using fastai's text sentiment data, we can 
bs=48
path=Path('./model/sentiment')
data_clas = load_data(path, 'data_clas.pkl', bs=bs)
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5)

In [None]:
def predict_sentiment(sentence:str) -> Tuple[str, float]:
    categorical, class_id, scores = learn.predict(sentence)
    score = round(scores[class_id].item(), 4)
    return "negative" if class_id == 0 else "positive", score

In [None]:
sub = r.pubsub(ignore_subscribe_messages=True)
sub.subscribe(['entity'])
for message in tqdm(sub.listen()):
    data = mq.read_message_data(message)
    sentiment, score = predict_sentiment(data['sentence'])
    for ent in data['entities']:
        output = dict(entity=ent,sentiment=sentiment,score=score)
        print(output)
        r.publish('final',mq.serialize_message_data(output))