In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta
from datetime import datetime as dt

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
import re
import string

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

In [None]:
data = pd.read_csv('wendys_tweets.csv')
data = data.dropna(subset=['text'])\
            .sort_values(by='datetime')\
            .reset_index(drop=True)
data.shape

In [None]:
data['text'].str.len().describe()

In [None]:
test = data['text'][1]
test

In [None]:
analyzer = SentimentIntensityAnalyzer()

data['vader_sentiment'] = [analyzer.polarity_scores(i)['compound']
                           for i in data['text']]
data['blob_sentiment'] = [TextBlob(i).sentiment.polarity
                          for i in data['text']]

data[['vader_sentiment', 'blob_sentiment']].describe()

In [None]:
lemmatizer = WordNetLemmatizer()

def nltk_tag_to_wordnet_tag(nltk_tag):
    if nltk_tag.startswith('J'):
        return wordnet.ADJ
    elif nltk_tag.startswith('V'):
        return wordnet.VERB
    elif nltk_tag.startswith('N'):
        return wordnet.NOUN
    elif nltk_tag.startswith('R'):
        return wordnet.ADV
    else:          
        return None

def lemmatize_sentence(sentence):
    nltk_tagged = nltk.pos_tag(nltk.word_tokenize(sentence))  
    wordnet_tagged = map(lambda x: (x[0], nltk_tag_to_wordnet_tag(x[1])), nltk_tagged)
    lemmatized_sentence = []
    for word, tag in wordnet_tagged:
        if tag is None:
            lemmatized_sentence.append(word)
        else:        
            lemmatized_sentence.append(lemmatizer.lemmatize(word, tag))
    return ' '.join(lemmatized_sentence)

def text_cleaner(text):
    text = lemmatize_sentence(text)
    text = text.lower()
    remove = re.compile('[%s]' % re.escape(string.punctuation+string.digits))
    text = remove.sub('', text).split(' ')
    return ' '.join(text)

In [None]:
data['clean_text'] = [text_cleaner(i) for i in data['text']]

data['clean_vader'] = [analyzer.polarity_scores(i)['compound']
                           for i in data['clean_text']]
data['clean_blob'] = [TextBlob(i).sentiment.polarity
                          for i in data['clean_text']]

data[['clean_vader', 'clean_blob']].describe()

In [None]:
stock = pd.read_csv('wendys_stock.csv')
stock = stock.drop(labels=[251], axis=0)
stock['Date'] = pd.to_datetime(stock['Date'])
stock.shape

In [None]:
stock.head()

In [None]:
data['datetime'] = pd.to_datetime(data['datetime'])

In [None]:
day = []
for d in data['datetime']:
    if d.time().hour < 17:
        day.append(d.date())
    else:
        day.append(d.date() - timedelta(1))
data['day'] = day

In [None]:
# data['day'] = [i.split(' ')[0] for i in data['datetime']]
data['day'] = pd.to_datetime(data['day'])

full_data = pd.merge(data,
                     stock,
                     how='left',
                     left_on='day',
                     right_on='Date')

full_data.columns = [i.strip().lower() for i in full_data.columns]

In [None]:
full_data['close/last'] = full_data['close/last'].str.strip()\
                                                 .str.replace('$', '')

In [None]:
full_data['close/last'].iloc[0] = 16.42
full_data['close/last'] = full_data['close/last'].astype(float).fillna(method='ffill')

In [None]:
full_data.head()

In [None]:
full_data.to_csv('full_corporate.csv', index=False)