In [24]:
import praw
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import pandas as pd
import datetime

import secrets
import config

In [2]:
news_tokenizer = AutoTokenizer.from_pretrained(config.HF_TOKENIZER_NEWS_CLASSIFIER)
news_model = AutoModelForSequenceClassification.from_pretrained(config.HF_MODEL_NEWS_CLASSIFIER)
sentiment_model = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


In [3]:
reddit = praw.Reddit(
    client_id=secrets.REDDIT_API_CLIENT_ID,
    client_secret=secrets.REDDIT_API_CLIENT_SECRET,
    user_agent=secrets.REDDIT_API_USER_AGENT
)
subreddit = reddit.subreddit(config.SUBREDDITS)

In [4]:
reddit.read_only

True

In [5]:
submissions = []

for submission in subreddit.stream.submissions():
    submissions.append(submission)
    if len(submissions) > config.NUM_SUBMISSION_TO_GET:
        break

In [28]:
class RedditSubmission():
    subreddit: str
    title: str
    time_created: datetime.datetime
    author: str
    inference_subject: str
    inference_sentiment: str

    def __init__(self, subreddit: str, title: str, time_created: str, author: str, inference_subject: str = None, inference_sentiment: str = None):
        self.subreddit = subreddit
        self.title = title
        self.time_created = self.convert_time_to_datetime(time_created)
        self.author = author
        self.inference_subject = self.run_subject_analysis()
        self.inference_sentiment = self.run_sentiment_analysis()

    def convert_time_to_datetime(self, time_created) -> datetime.datetime:
        dt = datetime.datetime.fromtimestamp(time_created)
        return dt

    def map_news_output_to_class(self, inference_output: torch.Tensor) -> str:
        softmax_values = []
        for output in inference_output:
            softmax_values.append(output.item())
        max_value = max(softmax_values)
        max_index = softmax_values.index(max_value)
        return config.NEWS_CLASSES[max_index]
    
    def run_subject_analysis(self) -> str:
        inputs = news_tokenizer(self.title, return_tensors="pt")
        labels = torch.tensor([1]).unsqueeze(0) # Batch size of 1
        outputs = news_model(**inputs, labels=labels) # Unpack key-value pairs into keyword args in function call
        news_subject = self.map_news_output_to_class(outputs.logits[0]) # Taking softmax tensor from inference
        return news_subject

    def run_sentiment_analysis(self) -> str:
        sentiment = sentiment_model(self.title)
        return sentiment[0]["label"]

In [29]:
reddit_submission_objects = []

for submission in submissions:
    s = RedditSubmission(submission.subreddit, submission.title, submission.created_utc, submission.author)
    reddit_submission_objects.append(s)

In [30]:
pd.DataFrame([vars(submission) for submission in reddit_submission_objects])

Unnamed: 0,subreddit,title,time_created,author,inference_subject,inference_sentiment
0,news,"U.S. home sales tumble; higher prices, mortgag...",2022-03-20 16:16:49,Whichwhenwhywhat,business,NEGATIVE
1,news,A Tennessee police officer fired his stun gun ...,2022-03-20 17:12:50,TheSwaguar,world,NEGATIVE
2,news,Chipotle Tests Tortilla Frying Robot to Make I...,2022-03-20 17:17:37,Double-Anteater228,sci/tech,NEGATIVE
3,news,David Beckham hands Instagram account to Ukrai...,2022-03-20 17:52:36,ILOVEMCU,world,NEGATIVE
4,news,CEO of B.C. resort on leave after sexist remar...,2022-03-20 18:11:07,MarseyTheCat,sci/tech,NEGATIVE
5,news,Justice Clarence Thomas hospitalized with 'flu...,2022-03-20 18:20:24,slade797,world,NEGATIVE
6,news,Russia demands Mariupol lay down arms but Ukra...,2022-03-20 21:00:22,Oldmanwaffle,world,NEGATIVE
7,news,Mariupol: Ukraine rejects Russian offer to sur...,2022-03-20 21:15:30,MakeItSo4692,world,POSITIVE
8,news,Australia to make Big Tech hand over misinform...,2022-03-20 21:33:58,psgia,sci/tech,NEGATIVE
9,news,Ammonia leak reported at chemicals plant in Uk...,2022-03-20 22:38:22,MinaFur,world,NEGATIVE
