In [1]:
# Dependencies
import tweepy
import json
import numpy as np
from config import consumer_key, consumer_secret, access_token, access_token_secret
from datetime import datetime
import pandas as pd
from nltk.tokenize import word_tokenize
import re

In [2]:
# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [3]:
# Twitter API Keys
consumer_key = consumer_key
consumer_secret = consumer_secret
access_token = access_token
access_token_secret = access_token_secret

In [4]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [5]:
# Lists to hold sentiments
compound_list = []
positive_list = []
negative_list = []
neutral_list = []
tweet_text = []
tweet_times = []
tweet_id = []
time_diffs = []
tweet_user = []
tweet_handle = []
tweet_followers = []
tweet_language = []

In [6]:
# Target Search Term
target_term = ' #AT&T INC (T) OR #$T OR \\"AT&T INC (T)\\" OR \\"$T\\" ' 

public_tweets = api.search(target_term, count=1500)

#print(json.dumps(public_tweets,indent = 4))

In [7]:
# Loop through all tweets
for tweet in public_tweets["statuses"]:

    # Run Vader Analysis on each tweet
    compound = analyzer.polarity_scores(tweet["text"])["compound"]
    pos = analyzer.polarity_scores(tweet["text"])["pos"]
    neu = analyzer.polarity_scores(tweet["text"])["neu"]
    neg = analyzer.polarity_scores(tweet["text"])["neg"]
    
    raw_time = tweet["created_at"]
    converted_time = datetime.strptime(raw_time, "%a %b %d %H:%M:%S %z %Y")
    tweet_times.append(converted_time)
    
       
    text = tweet["text"]
    tweet_text.append(text)
    
    tw_id = tweet["id"]
    tweet_id.append(tw_id)
    
    user = tweet["user"]["name"]
    tweet_user.append(user)
    
    handle = tweet["user"]["screen_name"]
    tweet_handle.append(handle)
    
    followers = tweet["user"]["followers_count"]
    tweet_followers.append(followers)
    
    language =  tweet["user"]["lang"]
    tweet_language.append(language)
    
        
    # Add each value to the appropriate array
    compound_list.append(compound)
    positive_list.append(pos)
    negative_list.append(neg)
    neutral_list.append(neu)
    


In [8]:
converted_length = len(tweet_times)

time_diffs = []

for x in range(converted_length - 1):
    time_diff = tweet_times[x] - tweet_times[x + 1]
#     print(f'time diff: {time_diff}')
#     print(f'time diff (in seconds): {time_diff.seconds}')
#     print(f'time diff (in minutes): {time_diff.seconds / 60}')
#     print(f'time diff (in hours): {time_diff.seconds / 3600}')

    # convert time_diff to hours
    time_diff = time_diff.seconds
    time_diffs.append(time_diff)

# adding a record to # of rows matches for data frame conversion
time_diffs.append(0)


In [9]:
tweet_data = pd.DataFrame({'Name':tweet_user,
                           'Handle':tweet_handle,
                           'Followers':tweet_followers,
                           'Language' : tweet_language,
                            'Id':tweet_id,
                            'Text': tweet_text,
                           'Time Stamp':tweet_times,
                           'Time Delta (seconds)':time_diffs,
                           'Compound' : compound_list,
                           'positive': positive_list,
                           'negative': negative_list,
                           'neutral': neutral_list})
 
for index, row in tweet_data.iterrows():
    
    date =  pd.to_datetime(row["Time Stamp"].date())
    tweet_data["Date"] = date
  


In [10]:
# Store the Average Sentiments
sentiment = {"Compound": np.mean(compound_list),
             "Positive": np.mean(positive_list),
             "Neutral": np.mean(neutral_list),
             "Negative": np.mean(negative_list)}

# Print the Sentiments
print(sentiment)
print("")


{'Compound': -0.002806999999999996, 'Positive': 0.0385, 'Neutral': 0.9249200000000001, 'Negative': 0.03658}



In [11]:
emoticons_str = r"""
    (?:
        [:=;] # Eyes
        [oO\-]? # Nose (optional)
        [D\)\]\(\]/\\OpP] # Mouth
    )"""
 
regex_str = [
    emoticons_str,
    r'<[^>]+>', # HTML tags
    r'(?:@[\w_]+)', # @-mentions
    r"(?:\#+[\w_]+[\w\'_\-]*[\w_]+)", # hash-tags
    r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&amp;+]|[!*\(\),]|(?:%[0-9a-f][0-9a-f]))+', # URLs
 
    r'(?:(?:\d+,?)+(?:\.?\d+)?)', # numbers
    r"(?:[a-z][a-z'\-_]+[a-z])", # words with - and '
    r'(?:[\w_]+)', # other words
    r'(?:\S)' # anything else
]
    
tokens_re = re.compile(r'('+'|'.join(regex_str)+')', re.VERBOSE | re.IGNORECASE)
emoticon_re = re.compile(r'^'+emoticons_str+'$', re.VERBOSE | re.IGNORECASE)
 
def tokenize(s):
    return tokens_re.findall(s)
 
def preprocess(s, lowercase=False):
    tokens = tokenize(s)
    if lowercase:
        tokens = [token if emoticon_re.search(token) else token.lower() for token in tokens]
    return tokens

tweet = 'RT @marcobonzanini: just an example! :D http://example.com #NLP'
print(preprocess(tweet))
# ['RT', '@marcobonzanini', ':', 'just', 'an', 'example', '!', ':D', 'http://example.com', '#NLP']


['RT', '@marcobonzanini', ':', 'just', 'an', 'example', '!', ':D', 'http://example.com', '#NLP']


In [12]:
tweet_text_breakdown = []
tweet_text_id = []

for index, row in tweet_data.iterrows():
    text_seperate = preprocess(row["Text"])
    tweet_text_breakdown.append(text_seperate)
    text_seperate_id = row["Id"]
    tweet_text_id.append(text_seperate_id)

text_breakdown_df = pd.DataFrame({'Id':tweet_text_id,
                                   'Text Breakdown':tweet_text_breakdown})
   

In [13]:
text_breakdown_df.head(5)


Unnamed: 0,Id,Text Breakdown
0,1010923508791169024,"[Buy, or, Sell, AT, &, amp, ;, T, Inc, ., Stoc..."
1,1010904363831832581,"[Check, out, the, 31, latest, openings, at, AT..."
2,1010888325652082696,"[RT, @ATTBusiness, :, AT, &, amp, ;, T, Busine..."
3,1010881764837867522,"[Both, shallow, :, $, FUSZ, nFusz, Inc, :, AT,..."
4,1010880926627311616,"[AT, &, amp, ;, T, to, launch, 5, G, in, U, .,..."


In [17]:
tweet_data_all = pd.merge(tweet_data,text_breakdown_df[["Id","Text Breakdown"]], on="Id",how="left")

tweet_data.to_csv("tweet_data_all.csv")

tweet_data_all.head(5)

Unnamed: 0,Compound,Followers,Handle,Id,Language,Name,Text,Time Delta (seconds),Time Stamp,negative,neutral,positive,Date,Text Breakdown
0,0.0,5192,Focus_on_Dvds,1010923508791169024,en,Focus on Dividends,Buy or Sell AT&amp;T Inc. Stock With Its 6% Di...,4565,2018-06-24 16:32:01+00:00,0.0,1.0,0.0,2018-06-21,"[Buy, or, Sell, AT, &, amp, ;, T, Inc, ., Stoc..."
1,0.0,364,tmj_WAR_sales,1010904363831832581,en,"Richland, WA Sales",Check out the 31 latest openings at AT&amp;T (...,3824,2018-06-24 15:15:56+00:00,0.0,1.0,0.0,2018-06-21,"[Check, out, the, 31, latest, openings, at, AT..."
2,0.296,4,asj_inc,1010888325652082696,en,Playwright Asj Inc,RT @ATTBusiness: AT&amp;T Business CMO Mo Kati...,1564,2018-06-24 14:12:12+00:00,0.0,0.891,0.109,2018-06-21,"[RT, @ATTBusiness, :, AT, &, amp, ;, T, Busine..."
3,0.2263,995,iHangout2,1010881764837867522,en,iHangout,Both shallow: $FUSZ nFusz Inc: AT&amp;T just a...,200,2018-06-24 13:46:08+00:00,0.0,0.881,0.119,2018-06-21,"[Both, shallow, :, $, FUSZ, nFusz, Inc, :, AT,..."
4,-0.296,591,silewconsulting,1010880926627311616,en,silew,AT&amp;T to launch 5G in U.S. by late 2018 (Re...,2185,2018-06-24 13:42:48+00:00,0.104,0.896,0.0,2018-06-21,"[AT, &, amp, ;, T, to, launch, 5, G, in, U, .,..."
