## Sentiment Analysis (Test)

### 1. Import libraries and link to Reddit

In [1]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
import praw
import matplotlib.pyplot as plt
import math
import datetime as dt
import pandas as pd
import numpy as np


nltk.download('vader_lexicon')
nltk.download('stopwords')


reddit = praw.Reddit(client_id='xxxx',
                    client_secret='xxxx',
                    user_agent='Quick-Sherbet-1373')

##print(reddit.read_only)

sub_reddits = reddit.subreddit('wallstreetbets')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\pkim3\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pkim3\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### 2. Array of stocks

In [7]:
stocks = ["TSLA", "LULU", "QQQ", "SPY"]

### 3. Function commentSentiment

In [3]:
def commentSentiment(ticker, urlT):
    subComments = []
    bodyComment = []
    try:
        check = reddit.submission(url=urlT)
        subComments = check.comments #obtain sub comments, add into Array
    except:
        return 0
    
    for comment in subComments:
        try: 
            bodyComment.append(comment.body) #obtain body only
        except:
            return 0
    
    sia = SIA()
    results = []
    for line in bodyComment:
        scores = sia.polarity_scores(line) #analyse sentiment, +ve score are positive valence, -ve score negative valence
        scores['headline'] = line

        results.append(scores)
    
    df =pd.DataFrame.from_records(results) #convert to pandas DF
    df.head()
    df['label'] = 0
    
    try:
        df.loc[df['compound'] > 0.1, 'label'] = 1
        df.loc[df['compound'] < -0.1, 'label'] = -1
    except:
        return 0
    
    averageScore = 0
    position = 0
    
    #calculate average score 
    
    while position < len(df.label)-1:
        averageScore = averageScore + df.label[position]
        position += 1
    averageScore = averageScore/len(df.label) 
    
    return(averageScore)

### 4. Function latestComment

In [4]:
def latestComment(ticker, urlT):
    subComments = []
    updateDates = []
    try:
        check = reddit.submission(url=urlT)
        subComments = check.comments
    except:
        return 0
    
    for comment in subComments:
        try: 
            updateDates.append(comment.created_utc)
        except:
            return 0
    
    updateDates.sort()
    return(updateDates[-1]) #return latest date


### 5. Function get_date

In [5]:
def get_date(date):
    return dt.datetime.fromtimestamp(date)

### 6. Run

In [8]:
submission_statistics = []
d = {}

for ticker in stocks:
    for submission in reddit.subreddit('wallstreetbets').search(ticker, limit=130):
        if submission.domain != "self.wallstreetbets":
            continue
        d = {}
        d['ticker'] = ticker
        d['num_comments'] = submission.num_comments
        d['comment_sentiment_average'] = commentSentiment(ticker, submission.url)
        if d['comment_sentiment_average'] == 0.000000:
            continue
        d['latest_comment_date'] = latestComment(ticker, submission.url)
        d['score'] = submission.score
        d['upvote_ratio'] = submission.upvote_ratio
        d['date'] = submission.created_utc
        d['domain'] = submission.domain
        d['num_crossposts'] = submission.num_crossposts
        d['author'] = submission.author
        submission_statistics.append(d)
    
dfSentimentStocks = pd.DataFrame(submission_statistics)

_timestampcreated = dfSentimentStocks["date"].apply(get_date)
dfSentimentStocks = dfSentimentStocks.assign(timestamp = _timestampcreated)

_timestampcomment = dfSentimentStocks["latest_comment_date"].apply(get_date)
dfSentimentStocks = dfSentimentStocks.assign(commentdate = _timestampcomment)

dfSentimentStocks.sort_values("latest_comment_date", axis = 0, ascending = True,inplace = True, na_position ='last') 

dfSentimentStocks

Unnamed: 0,ticker,num_comments,comment_sentiment_average,latest_comment_date,score,upvote_ratio,date,domain,num_crossposts,author,timestamp,commentdate
117,LULU,49,0.166667,1.535663e+09,21,0.89,1.535659e+09,self.wallstreetbets,0,snackotron,2018-08-30 12:55:43,2018-08-30 14:01:17
116,LULU,32,0.384615,1.535671e+09,79,0.93,1.535479e+09,self.wallstreetbets,0,pb1217,2018-08-28 10:56:21,2018-08-30 16:22:15
114,LULU,25,0.181818,1.537471e+09,103,0.90,1.537419e+09,self.wallstreetbets,0,JPVizzle,2018-09-19 21:47:49,2018-09-20 12:09:17
108,LULU,34,0.166667,1.542652e+09,47,0.90,1.541771e+09,self.wallstreetbets,0,lasemoco,2018-11-09 05:42:01,2018-11-19 10:28:18
107,LULU,31,-0.187500,1.544132e+09,38,0.90,1.544126e+09,self.wallstreetbets,0,kndawg,2018-12-06 11:51:54,2018-12-06 13:30:56
...,...,...,...,...,...,...,...,...,...,...,...,...
5,TSLA,52,0.066667,1.609543e+09,75,0.89,1.609508e+09,self.wallstreetbets,0,notlikethis1994,2021-01-01 05:33:45,2021-01-01 15:16:11
30,TSLA,111,-0.018868,1.609551e+09,33,0.75,1.606869e+09,self.wallstreetbets,0,patrioticparrots,2020-12-01 16:34:39,2021-01-01 17:36:29
197,SPY,348,-0.025316,1.609605e+09,561,0.95,1.607135e+09,self.wallstreetbets,0,StevenVanMetre,2020-12-04 18:23:14,2021-01-02 08:25:11
7,TSLA,58,0.086957,1.609605e+09,47,0.68,1.609259e+09,self.wallstreetbets,0,bilalgag,2020-12-29 08:15:09,2021-01-02 08:31:04


In [9]:
dfSentimentStocks.author.value_counts()


dfSentimentStocks.to_csv('Reddit_Sentiment_Equity.csv', index=False) 