In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime as dt

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

In [47]:
data = pd.read_csv('wendys_tweets.csv')
data = data.dropna(subset=['text'])\
            .sort_values(by='datetime')\
            .reset_index(drop=True)
data.shape

(67528, 4)

In [49]:
data['text'].str.len().describe()

count    67528.000000
mean        72.700272
std         50.853891
min          1.000000
25%         20.000000
50%         85.000000
75%        113.000000
max        297.000000
Name: text, dtype: float64

In [50]:
test = data['text'][1]
test

"Please DM us any information you have, especially the employee's contact info and the restaurant location so we can have HR get in touch. Thank you!"

In [51]:
analyzer = SentimentIntensityAnalyzer()

data['vader_sentiment'] = [analyzer.polarity_scores(i)['compound']
                           for i in data['text']]
data['blob_sentiment'] = [TextBlob(i).sentiment.polarity
                          for i in data['text']]

data[['vader_sentiment', 'blob_sentiment']].describe()

Unnamed: 0,vader_sentiment,blob_sentiment
count,67528.0,67528.0
mean,0.254908,0.114192
std,0.337796,0.295593
min,-0.875,-1.0
25%,0.0,0.0
50%,0.3182,0.0
75%,0.555,0.275
max,0.9936,1.0


In [52]:
def text_cleaner(text):
    import re
    import string
    from nltk.stem import WordNetLemmatizer
    
    text = text.lower()
    remove = re.compile('[%s]' % re.escape(string.punctuation+string.digits))
    text = remove.sub('', text)
    
    return text

In [53]:
data['clean_text'] = [text_cleaner(i) for i in data['text']]

data['clean_vader'] = [analyzer.polarity_scores(i)['compound']
                           for i in data['clean_text']]
data['clean_blob'] = [TextBlob(i).sentiment.polarity
                          for i in data['clean_text']]

data[['clean_vader', 'clean_blob']].describe()

Unnamed: 0,clean_vader,clean_blob
count,67528.0,67528.0
mean,0.262811,0.112194
std,0.322208,0.281632
min,-0.875,-1.0
25%,0.0,0.0
50%,0.2732,0.0
75%,0.5106,0.25
max,0.9936,1.0


In [54]:
stock = pd.read_csv('wendys_stock.csv')
stock = stock.drop(labels=[251], axis=0)
stock['Date'] = pd.to_datetime(stock['Date'])
stock.shape

(503, 6)

In [55]:
stock.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
0,2018-01-02,$16.32,2076575,$16.41,$16.50,$16.24
1,2018-01-03,$16.70,3074085,$16.91,$16.91,$16.36
2,2018-01-04,$16.51,2552011,$16.75,$16.85,$16.24
3,2018-01-05,$16.79,3277766,$16.68,$16.84,$16.62
4,2018-01-08,$16.62,1915344,$16.73,$16.73,$16.38


In [56]:
data['day'] = [i.split(' ')[0] for i in data['datetime']]
data['day'] = pd.to_datetime(data['day'])

full_data = pd.merge(data,
                     stock,
                     how='left',
                     left_on='day',
                     right_on='Date')

full_data.columns = [i.strip().lower() for i in full_data.columns]

In [57]:
full_data['close/last'] = full_data['close/last'].str.strip()\
                                                 .str.replace('$', '')

In [58]:
full_data['close/last'].iloc[0] = 16.42
full_data['close/last'] = full_data['close/last'].astype(float).fillna(method='ffill')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [59]:
full_data.head()

Unnamed: 0,datetime,text,favorites,retweets,vader_sentiment,blob_sentiment,clean_text,clean_vader,clean_blob,day,date,close/last,volume,open,high,low
0,2018-01-01 18:49:44+00:00,"Please DM us the location , date and time of y...",0,1,0.667,0.0,please dm us the location date and time of yo...,0.634,0.0,2018-01-01,NaT,16.42,,,,
1,2018-01-01 18:52:31+00:00,"Please DM us any information you have, especia...",1,1,0.6239,0.0,please dm us any information you have especial...,0.5859,0.0,2018-01-01,NaT,16.42,,,,
2,2018-01-01 23:09:57+00:00,Oh no! Please DM us the restaurant location an...,0,1,0.4912,0.357143,oh no please dm us the restaurant location and...,0.3818,0.285714,2018-01-01,NaT,16.42,,,,
3,2018-01-01 23:12:34+00:00,We will send out a case for you so management ...,0,1,0.8297,0.3125,we will send out a case for you so management ...,0.8158,0.25,2018-01-01,NaT,16.42,,,,
4,2018-01-02 02:46:40+00:00,We're disappointed to hear this. Shoot us over...,0,1,-0.5187,-0.75,were disappointed to hear this shoot us over a...,-0.5187,-0.75,2018-01-02,2018-01-02,16.32,2076575.0,$16.41,$16.50,$16.24


In [60]:
full_data.to_csv('full_corporate.csv')