### Package Imports

In [None]:
from bs4 import BeautifulSoup
from nltk.corpus import stopwords

from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
import flair
from flair.data import Sentence
from flair.models import SequenceTagger

### Preliminary Cleanup

##### Null Value Drop

In [None]:
def removenull(df):
    if df['title'].isnull().sum() != 0:
        df.dropna(subset=['title'], inplace = True)

##### Text Concatenate

In [None]:
def textcombine(df):
    df['title_and_text'] = ''
    for i in range(0, len(df['title'])):
        if pd.isnull(df['selftext'][i]) == True:
            df['title_and_text'][i] = df['title'][i]
        elif df['selftext'][i] == '[removed]':
            df['title_and_text'][i] = df['title'][i]
        else:
            df['title_and_text'][i] = df['title'][i] + ' ' + df['selftext'][i]

### Text Normalization

In [None]:
def standardize_text(df, column):
    for i in range(len(df)):

        review_text = str(BeautifulSoup(df[column][i]).get_text())

        letters_only = re.sub("[^a-zA-Z0-9]", " ", review_text)


        words = letters_only.lower().split()

        stops = set(stopwords.words('english'))


        meaningful_words = [w for w in words if w not in stops]

        df[column][i] = " ".join(meaningful_words)

### Sentiment Analysis

#####  Sentiment Vader

In [None]:
def vader_score(df, column):
    
    df["vader_neu_score"] = ""
    df["vader_pos_score"] = ""
    df["vader_neg_score"] = ""
    df["vader_compound"] = ""
    
    for i in range(0, len(df)):
        sid = SentimentIntensityAnalyzer()
        sent_dict = sid.polarity_scores(df[column][i])
        df["vader_neg_score"][i] = sent_dict['neg']
        df["vader_neu_score"][i] = sent_dict['neu']
        df["vader_pos_score"][i] = sent_dict['pos']
        df["vader_compound"][i] = sent_dict['compound']

##### Sentiment TextBlob

In [None]:
def blob_score(df, column):
    
    df['blob_polarity'] = ''
    df['blob_subjectivity'] = ''
    
    for i in range(len(df)):
        blob = TextBlob(df[column][i])
        df['blob_polarity'][i] = blob.sentiment[0]
        df['blob_subjectivity'][i] = blob.sentiment[1]

##### Sentiment Flair

In [None]:
def flair_score(df, column):
    
    flair_sentiment = flair.models.TextClassifier.load('en-sentiment')
    df['flair_polarity'] = ""
    df['flair_score'] = ""
    
    for i in range(0, len(df)):
        s = flair.data.Sentence(df[column][i])
        flair_sentiment.predict(s)
        total_sentiment = s.labels
        total_sentiment
        df['flair_polarity'][i] = str(total_sentiment[0]).split(' (')[0]
        df['flair_score'][i] = (str(total_sentiment[0]).split(' (')[1].split(')')[0])

### Post Markup

##### Mean Sentiment Scores

In [None]:
def mean_senti(df):
    
    df['neu_score'] = df['neu_score'].astype(str).astype(float)
    df['pos_score'] = df['pos_score'].astype(str).astype(float)
    df['neg_score'] = df['neg_score'].astype(str).astype(float)
    df['compound'] = df['compound'].astype(str).astype(float)
    
    mean_df = df.groupby('Date').agg('mean')
    
    return df

##### Post Count

In [None]:
def post_sum(df):
    
    df['Sum_posts'] = 1
    
    sum_df = df.groupby('Date').agg('sum')
    sum_df.drop(columns = sum_df.columns[:-1], inplace = True)
    sum_df['zip_code'] = sum_df.index
    df2 = pd.merge(df, sum_df, left_index = True, right_index = True)