In [52]:
import re
import tweepy
from textblob import TextBlob
import json
import os
import pandas as p 

class TwitterClient(object):
 '''
    Generic Twitter Class for sentiment analysis.
'''

def clean_tweet(tweet):
    '''
    Utility function to clean tweet text by removing links, special characters
    using simple regex statements.
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

In [53]:
def get_tweet_sentiment(tweet):
    '''
    Utility function to classify sentiment of passed tweet
    using textblob's sentiment method
    '''
    # create TextBlob object of passed tweet text
    analysis = TextBlob(tweet)
    # set sentiment
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity == 0:
        return 'neutral'
    else:
        return 'negative'

In [54]:
def get_tweets(filename):
    '''
    Main function to fetch tweets and parse just the text part.
    '''
    import json

    tweets={}
    f = open(filename)
    data = json.load(f)
    
    labeled = []
    for tweet in data:
         tweets = {'text': tweet["text"], 'sentiment': get_tweet_sentiment(tweet["text"])}
         labeled.append(tweets)
    return labeled
        


In [73]:
def main():

    
    #directory of tweets, organized by date, then state 
    indir = '/Users/crystal/Big-Data-proj/filtered_tweets'
    
    #iterating through the each directory to look at the tweets in each state
    for filename in os.listdir(indir):
        
        if filename != ".DS_Store":
            print("{}".format(filename))
            for file in os.listdir("filtered_tweets/"+filename+"/"):
                state_change = {}
                
                
                if file.endswith(".json"):
                    ptweet = []
                    ntweet = []
                    netweet = []
                    tweet = get_tweets("filtered_tweets/"+filename+"/"+file)

                    
                    # sorting tweets by positive, negative and neutral sentiment 
                    for t in tweet:

                        if t['sentiment'] == 'positive':
                            ptweet.append(t)



                        elif t['sentiment'] == 'negative':
                            ntweet.append(t)

                        elif t['sentiment'] == 'neutral':
                            netweet.append(t)


                    
                    # check to make sure there are tweets in file
                    if(len(tweet) > 0):
                        
                        #find net sentiment 
                        net_daily = len(ptweet) - len(ntweet)
                        state_change[file[0:2]] = net_daily
                        
                        
                        # picking negative tweets from tweets
                        ntweet = [tweet for tweet in tweet if tweet['sentiment'] == 'negative']
                        
                    #no tweets in file
                    else:
                        state_change[file[0:2]] = 0
    
                        
                changeData = p.Series(state_change)
                for name, val in changeData.iteritems():
                    print(name,val)
            print("\n")
            
if __name__ == "__main__":
    # calling main function
    main()

11-07-2017
NA 0
KS 0
UT 0
OR 1
DC 0
OK 0
NM 0
LA 0
WA 0
NY 1
MS 0
PR 0
WY 0
VT 0
DE 0
MT 0
KY 0
VA 1
TX 4
MA 0
IL 0
AZ 0
PA 1
CO 0
IA 0
NJ 0
SD -1
MO 1
WI 0
GA 0
IN 0
NE 0
AL 0
CT 0
ND 0
GU 0
HI 0
CA 3
MN 1
TN 0
NH 0
MI 1
VI 0
NV 1
NC 1
AS 0
ME 0
ID 0
WV 0
MP 0
OH 0
FL -1
SC 0
AR 0
MD -1
AK 0
RI 1


11-06-2017
NA 0
KS 1
UT 1
OR 0
DC 0
OK 0
NM 0
LA 0
WA 0
NY 1
MS 0
PR 0
WY 0
VT 0
DE 0
MT 0
KY 3
VA 0
TX 2
MA 1
IL 0
AZ 0
PA 0
CO 0
IA 1
NJ 0
SD 0
MO 0
WI 1
GA 0
IN 0
NE 0
AL 0
CT 0
ND 0
GU 0
HI 0
CA -2
MN 0
TN 0
NH 0
MI 0
VI 0
NV 0
NC 1
AS 0
ME 0
ID 0
WV 0
MP 0
OH 0
FL 0
SC 0
AR 0
MD 0
AK 0
RI 0


11-08-2017
NA 0
KS 0
UT 0
OR 1
DC 0
OK 1
NM 0
LA 2
WA 0
NY 5
MS 1
PR 0
WY 0
VT 0
DE 0
MT 0
KY 0
VA 1
TX 3
MA 0
IL 0
AZ 4
PA 0
CO 1
IA 0
NJ 1
SD 0
MO 1
WI 0
GA -1
IN 0
NE 0
AL 0
CT 0
ND 0
GU 0
HI 0
CA 9
MN 0
TN 0
NH 0
MI 2
VI 0
NV 0
NC 2
AS 0
ME 0
ID -1
WV 0
MP 0
OH 0
FL 2
SC 0
AR 0
MD 2
AK 0
RI 0


12-05-2017
NA 0
KS 0
UT 0
OR 0
DC 0
OK 0
NM 0
LA 0
WA 0
NY 0
MS 0
PR 0
WY 0
VT 0
DE 0
MT 0
KY 0
VA 0