In [1]:
import nltk
import pandas as pd
import string
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords

analyzer = SentimentIntensityAnalyzer()
STOPWORDS = set(stopwords.words('english'))

In [3]:
crypto = 'crypto_bitcoin'
# crypto = 'crypto_ethereum'
# crypto = 'crypto_xrp'

In [4]:
list_subs = ['bitcoin','bitcoinbeginners','bitcoinmarkets','bitcoinmining','btc']
# list_subs = ['ethereum','ethermining','ethfinance','eth','ethtrader']

In [5]:
def map_sentiment(score):
    if -1 <= score <= -0.5:
        return 'Negative'
    elif 0.5 < score <= 1:
        return 'Positive'
    else:
        return 'Neutral'

In [5]:
# CryptoBERT Processing
for subreddit in list_subs:
    print(f"Started processing! {subreddit}")
    posts_df = pd.read_csv(f"Data/Cleaned_Data/{crypto}/submission_and_comments/{subreddit}_submissions_19_22.csv")
    print("\tLoaded submissions DF")
    posts_df = posts_df[~posts_df['selftext'].isin(['[deleted]', '[removed]'])]
    print(f"{subreddit}: {posts_df.shape} after deleting removed/deleted posts")
    posts_df ['title'].fillna('', inplace=True)
    posts_df ['selftext'].fillna('', inplace=True)
    # Concatenate the two columns
    posts_df ['post_text'] = posts_df ['title'] + ' ' + posts_df ['selftext']
    posts_df['# Of Words'] = posts_df['post_text'].apply(lambda x: len(x.split(' ')))
    posts_df[['submission','post_text','subreddit','score','num_comments','posted_on', '# Of Words']].to_csv(f"Data/Sentiment/{crypto}/CRYPTOBERT/{subreddit}_submission_19_22.csv",index=False)
    display(posts_df[['submission','post_text','subreddit','score','num_comments','posted_on', '# Of Words']].head(2))
    # print(f"Saved! {subreddit}")

Started processing! bitcoin
	Loaded submissions DF
bitcoin: (90465, 10) after deleting removed/deleted posts


Unnamed: 0,submission,post_text,subreddit,score,num_comments,posted_on,# Of Words
0,abcwdu,Are these accurate criticisms of Lightning Net...,Bitcoin,0,11,2019-01-01 00:05:35,93
1,abcxb0,How to open a Bitcoin wallet without a bank ac...,Bitcoin,6,12,2019-01-01 00:08:21,66


Started processing! bitcoinbeginners
	Loaded submissions DF
bitcoinbeginners: (18114, 10) after deleting removed/deleted posts


Unnamed: 0,submission,post_text,subreddit,score,num_comments,posted_on,# Of Words
8,abq5jg,Issue while withdrawing funds I have an issue ...,BitcoinBeginners,2,11,2019-01-02 05:08:14,74
12,absh1i,Newbie Alert!! Looking into diving into the cr...,BitcoinBeginners,10,30,2019-01-02 11:08:39,70


Started processing! bitcoinmarkets
	Loaded submissions DF
bitcoinmarkets: (1974, 10) after deleting removed/deleted posts


Unnamed: 0,submission,post_text,subreddit,score,num_comments,posted_on,# Of Words
0,aberj1,"[Altcoin Discussion] Tuesday, January 01, 2019...",BitcoinMarkets,5,2,2019-01-01 04:05:32,168
9,abpnzp,"[Altcoin Discussion] Wednesday, January 02, 20...",BitcoinMarkets,6,14,2019-01-02 04:05:36,168


Started processing! bitcoinmining
	Loaded submissions DF
bitcoinmining: (6288, 10) after deleting removed/deleted posts


Unnamed: 0,submission,post_text,subreddit,score,num_comments,posted_on,# Of Words
1,abdzjx,S9 Exhaust Fan Replacement? I'm thinking that ...,BitcoinMining,3,9,2019-01-01 02:15:26,106
25,ac6544,"If mining is unprofitable, why are people stil...",BitcoinMining,9,26,2019-01-03 14:23:33,79


Started processing! btc
	Loaded submissions DF
btc: (21453, 10) after deleting removed/deleted posts


Unnamed: 0,submission,post_text,subreddit,score,num_comments,posted_on,# Of Words
0,abcvgy,I have a mission if you choose to accept it......,btc,0,0,2019-01-01 00:02:52,70
2,abdnua,"When Roger offered $1.25M to Opennode, did the...",btc,8,8,2019-01-01 01:33:22,26


In [10]:
list_subs

['ethereum', 'ethermining', 'ethfinance', 'eth', 'ethtrader']

In [11]:
for subreddit in list_subs:
    print(f"Started processing! {subreddit}")
    posts_df = pd.read_csv(f"Data/Cleaned_Data/{crypto}/submission_and_comments/{subreddit}_submissions_19_22.csv")
    print("\tLoaded submissions DF")
    posts_df = posts_df[~posts_df['selftext'].isin(['[deleted]', '[removed]'])]
    print(f"{subreddit}: {posts_df.shape} after deleting removed/deleted posts")
    posts_df ['title'].fillna('', inplace=True)
    posts_df ['selftext'].fillna('', inplace=True)
    # Concatenate the two columns
    posts_df ['post_text'] = posts_df ['title'] + ' ' + posts_df ['selftext']
    posts_df['VADER_Score']=posts_df['post_text'].map(lambda txt : analyzer.polarity_scores(txt)['compound'])
    print("\tVader SA Posts completed")
    posts_df['VADER_Post_Sentiment'] = posts_df['VADER_Score'].apply(map_sentiment)
    print("\tSentiment labels assigned!")
    posts_df['# Of Words'] = posts_df['post_text'].apply(lambda x: len(x.split(' ')))
    posts_df['# Of StopWords'] = posts_df['post_text'].apply(lambda x: len([word for word in x.split(' ') if word in list(STOPWORDS)]))
    posts_df['Average Word Length'] = posts_df['post_text'].apply(lambda x: np.mean(np.array([len(va) for va in x.split(' ') if va not in list(STOPWORDS)])))
    posts_df[['submission','subreddit','score','num_comments','posted_on','VADER_Score', 'VADER_Post_Sentiment', '# Of Words','# Of StopWords','Average Word Length']].to_csv(f"Data/Sentiment/{crypto}/VADER/{subreddit}_submission_19_22.csv",index=False)
    print(f"Saved! {subreddit}")

Started processing! ethereum
	Loaded submissions DF
ethereum: (16455, 10) after deleting removed/deleted posts
	Cleaned posts DF for Vader SA
	Vader SA Posts completed
	Sentiment labels assigned!
Saved! ethereum
Started processing! ethermining
	Loaded submissions DF
ethermining: (30918, 10) after deleting removed/deleted posts
	Cleaned posts DF for Vader SA
	Vader SA Posts completed
	Sentiment labels assigned!
Saved! ethermining
Started processing! ethfinance
	Loaded submissions DF
ethfinance: (2220, 10) after deleting removed/deleted posts
	Cleaned posts DF for Vader SA
	Vader SA Posts completed
	Sentiment labels assigned!
Saved! ethfinance
Started processing! eth
	Loaded submissions DF
eth: (1015, 10) after deleting removed/deleted posts
	Cleaned posts DF for Vader SA
	Vader SA Posts completed
	Sentiment labels assigned!
Saved! eth
Started processing! ethtrader
	Loaded submissions DF
ethtrader: (28005, 10) after deleting removed/deleted posts
	Cleaned posts DF for Vader SA
	Vader SA 