In [1]:
import re, string
import random
import json as json
from nltk import word_tokenize
from nltk.corpus import twitter_samples
from nltk.tag import pos_tag
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk import FreqDist
from nltk import classify
from nltk import NaiveBayesClassifier

stop_words = stopwords.words('english')

with open('master.json') as f:
    palantir = json.load(f)
    
def tweets_to_string(data):    
    list = []   
    for key in data:
        list += [data[key]['text']]
    return list
        
palantir_tweets = tweets_to_string(palantir)

positive_tweets = twitter_samples.strings('positive_tweets.json')

negative_tweets = twitter_samples.strings('negative_tweets.json')
text = twitter_samples.strings('tweets.20150430-223406.json')

positive_tweet_tokens = twitter_samples.tokenized('positive_tweets.json')
negative_tweet_tokens = twitter_samples.tokenized('negative_tweets.json')
palantir_tweet_tokens = [word_tokenize(i) for i in palantir_tweets]

    

def lemmatize_sentence(tokens):
    lemmatizer = WordNetLemmatizer()
    lemmatized_sentence = []
    for word, tag in pos_tag(tokens):
        if tag.startswith('NN'):
            pos = 'n'
        elif tag.startswith('VB'):
            pos = 'v'
        else:
            pos = 'a'
        lemmatized_sentence.append(lemmatizer.lemmatize(word, pos))
    return lemmatized_sentence

def remove_noise(tweet_tokens, stop_words = ()):
    cleaned_tokens = []
    for token, tag in pos_tag(tweet_tokens):
        token = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|'\
                       '(?:%[0-9a-fA-F][0-9a-fA-F]))+','', token)
        token = re.sub("(@[A-Za-z0-9_]+)","", token)
        if tag.startswith("NN"):
            pos = 'n'
        elif tag.startswith('VB'):
            pos = 'v'
        else:
            pos = 'a'
        lemmatizer = WordNetLemmatizer()
        token = lemmatizer.lemmatize(token, pos)
        if len(token) > 0 and token not in string.punctuation and token.lower() not in stop_words:
            cleaned_tokens.append(token.lower())
    return cleaned_tokens

positive_cleaned_tokens_list = []
negative_cleaned_tokens_list = []
palantir_cleaned_tokens_list = []

for tokens in positive_tweet_tokens:
    positive_cleaned_tokens_list.append(remove_noise(tokens, stop_words))

for tokens in negative_tweet_tokens:
    negative_cleaned_tokens_list.append(remove_noise(tokens, stop_words))
    
for tokens in palantir_tweet_tokens:
    palantir_cleaned_tokens_list.append(remove_noise(tokens, stop_words))
    
def get_all_words(cleaned_tokens_list):
    for tokens in cleaned_tokens_list:
        for token in tokens:
            yield token

all_pos_words = get_all_words(positive_cleaned_tokens_list)
all_neg_words = get_all_words(negative_cleaned_tokens_list)
all_palantir_words = get_all_words(palantir_cleaned_tokens_list)

#freq_dist_pos = FreqDist(all_pos_words)
#print(freq_dist_pos.most_common(10))

def get_tweets_for_model(cleaned_tokens_list):
    for tweet_tokens in cleaned_tokens_list:
        yield dict([token, True] for token in tweet_tokens)

positive_tokens_for_model = get_tweets_for_model(positive_cleaned_tokens_list)
negative_tokens_for_model = get_tweets_for_model(negative_cleaned_tokens_list)
palantir_tokens_for_model = get_tweets_for_model(palantir_cleaned_tokens_list)

positive_dataset = [(tweet_dict, "Positive")
                     for tweet_dict in positive_tokens_for_model]

negative_dataset = [(tweet_dict, "Negative")
                     for tweet_dict in negative_tokens_for_model]

palantir_positive_dataset = [(tweet_dict, "Positive")
                     for tweet_dict in palantir_tokens_for_model]

palantir_negative_dataset = [(tweet_dict, "Negative")
                     for tweet_dict in palantir_tokens_for_model]

dataset = positive_dataset + negative_dataset
palantir_dataset = palantir_positive_dataset + palantir_negative_dataset

random.shuffle(dataset)
random.shuffle(palantir_dataset)

train_data = dataset[:7000]
test_data = dataset[7000:]

classifier = NaiveBayesClassifier.train(train_data)

print("Accuracy is:", classify.accuracy(classifier, palantir_dataset))

print(classifier.show_most_informative_features(100))

Accuracy is: 0.7934508816120907
Most Informative Features
                      :) = True           Positi : Negati =   1629.8 : 1.0
                follower = True           Positi : Negati =     35.6 : 1.0
                     sad = True           Negati : Positi =     23.8 : 1.0
                     bam = True           Positi : Negati =     23.2 : 1.0
                     x15 = True           Negati : Positi =     17.4 : 1.0
               community = True           Positi : Negati =     16.7 : 1.0
                     ugh = True           Negati : Positi =     14.0 : 1.0
                   didnt = True           Negati : Positi =     14.0 : 1.0
                followed = True           Negati : Positi =     13.3 : 1.0
                    glad = True           Positi : Negati =     12.7 : 1.0
                  arrive = True           Positi : Negati =     12.7 : 1.0
                    kill = True           Negati : Positi =     11.2 : 1.0
                congrats = True           

# Palantir Info

In [25]:
import re, string
import random
import json as json
from nltk import word_tokenize
from nltk.corpus import twitter_samples
from nltk.tag import pos_tag
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk import FreqDist
from nltk import classify
from nltk import NaiveBayesClassifier
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy_copy/output/PLTR/master.json') as f:
    palantir = json.load(f)
    
def tweets_to_string(data):    
    list = []   
    for key in data:
        list += [data[key]['text']]
    return list
def retweet_count(data):
    list = []   
    for key in data:
        list += [data[key]['retweet_count']]
    return list
def favorite_count(data):
    list = []   
    for key in data:
        list += [data[key]['favorite_count']]
    return list

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    
def sentiment_analyzer_scores(text):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(text)
    lb = score['compound']
    if lb >= 0.05:
        return 1
    elif (lb > -0.05) and (lb < 0.05):
        return 0
    else:
        return -1
    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()



     compound    neg    neu    pos  vader   txtblob  retweets  favorites
0      0.0000  0.000  1.000  0.000      0  0.000000         0          1
1      0.6908  0.000  0.759  0.241      1  0.350000         0          0
2      0.5859  0.000  0.759  0.241      1  0.600000         0          0
3      0.0000  0.000  1.000  0.000      0  0.000000         0          0
4      0.6369  0.000  0.794  0.206      1  0.200000         1          6
5      0.4019  0.000  0.891  0.109      1 -0.033333         0          0
6      0.3400  0.000  0.902  0.098      1  0.000000         1          6
7      0.3400  0.000  0.854  0.146      1  0.000000         0          0
8      0.2960  0.000  0.845  0.155      1  0.100000         2          6
9      0.4310  0.000  0.806  0.194      1  0.333333         0          0
10     0.0000  0.000  1.000  0.000      0  0.000000         1          0
11     0.3612  0.000  0.898  0.102      1  0.066667         0          0
12     0.0000  0.000  1.000  0.000      0  0.000000

# Palantir After INFO

In [17]:


#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy_copy/output/PLTR/after/after_master.json') as f:
    palantir = json.load(f)
    

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    
    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()



     compound    neg    neu    pos  vader   txtblob  retweets  favorites
0      0.0000  0.000  1.000  0.000      0  0.062500         0          0
1      0.0701  0.089  0.813  0.098      1  0.261111         0          1
2      0.0000  0.000  1.000  0.000      0  0.000000         0          0
3      0.0000  0.000  1.000  0.000      0 -0.155556         0          3
4      0.0000  0.000  1.000  0.000      0  0.000000         0          0
5     -0.8418  0.282  0.718  0.000     -1 -0.366667         0          0
6      0.0000  0.000  1.000  0.000      0  0.000000         0          0
7      0.0000  0.000  1.000  0.000      0  0.050000         0          1
8      0.0000  0.000  1.000  0.000      0  0.062500         0          0
9      0.0000  0.000  1.000  0.000      0  0.000000         0          0
10     0.0000  0.000  1.000  0.000      0  0.062500         0          0
11     0.0000  0.000  1.000  0.000      0  0.062500         0          0
12     0.5574  0.000  0.805  0.195      1  0.218182

# KC INFO

In [24]:


#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy/output/KC/master.json') as f:
    palantir = json.load(f)


palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    

    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()




    compound    neg    neu    pos  vader   txtblob  retweets  favorites
0     0.0000  0.000  1.000  0.000      0  0.083333         4          1
1    -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
2     0.0000  0.000  1.000  0.000      0  0.125000         0          0
3     0.0000  0.000  1.000  0.000      0  0.125000         0          0
4     0.4019  0.000  0.881  0.119      1  0.203125         0          2
5    -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
6     0.0000  0.000  1.000  0.000      0 -0.100000         0          0
7     0.0000  0.000  1.000  0.000      0 -0.100000         1          0
8    -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
9     0.2960  0.000  0.896  0.104      1  0.000000         1          1
10    0.1280  0.000  0.923  0.077      1  0.250000         0          0
11    0.0000  0.000  1.000  0.000      0  0.000000         0          0
12   -0.7906  0.286  0.714  0.000     -1 -0.042045         0    

# KC AFTER INFO

In [23]:

#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy_copy/output/KC/after/after_master.json') as f:
    palantir = json.load(f)
    

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)

    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()



     compound    neg    neu    pos  vader   txtblob  retweets  favorites
0     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
1     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
2     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
3     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
4      0.0000  0.000  1.000  0.000      0  0.000000         2          3
5     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
6     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
7     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
8     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
9     -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
10    -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
11    -0.7906  0.286  0.714  0.000     -1 -0.042045         0          0
12     0.3400  0.000  0.912  0.088      1  0.000000

# ASAN INFO

In [14]:


#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy/output/ASAN/master.json') as f:
    palantir = json.load(f)
    

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    
    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()




    compound    neg    neu    pos  vader   txtblob  retweets  favorites
0     0.0000  0.000  1.000  0.000      0  0.000000         0          0
1     0.0000  0.000  1.000  0.000      0  0.000000         0          0
2     0.0000  0.000  1.000  0.000      0  0.000000         0          1
3     0.0000  0.000  1.000  0.000      0  0.000000         0          2
4     0.0000  0.000  1.000  0.000      0  0.100000         0          0
5     0.0000  0.000  1.000  0.000      0  0.050000         0          0
6     0.0000  0.000  1.000  0.000      0  0.000000         0          0
7     0.0000  0.000  1.000  0.000      0  0.000000         1          2
8     0.4144  0.000  0.888  0.112      1  0.200000         0          0
9     0.4404  0.000  0.791  0.209      1  0.700000         0          1
10    0.0000  0.000  1.000  0.000      0  0.100000         0          2
11    0.0516  0.093  0.805  0.102      1  0.100000         2          2
12    0.0000  0.000  1.000  0.000      0  0.250000         0    

# ASAN AFTER INFO

In [22]:


#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy_copy/output/ASAN/after/after_master.json') as f:
    palantir = json.load(f)
    

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    

    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()



     compound    neg    neu    pos  vader   txtblob  retweets  favorites
0      0.0000  0.000  1.000  0.000      0  0.000000         0          0
1      0.0000  0.000  1.000  0.000      0  0.000000         0          9
2      0.0000  0.000  1.000  0.000      0  0.100000         0          0
3      0.8020  0.000  0.586  0.414      1  0.500000         0          0
4      0.0000  0.000  1.000  0.000      0  0.000000         0          0
5      0.0000  0.000  1.000  0.000      0  0.000000         0          2
6      0.0000  0.000  1.000  0.000      0  0.000000         0          0
7      0.0000  0.000  1.000  0.000      0  0.000000         0          0
8      0.0000  0.000  1.000  0.000      0  0.100000         0          3
9     -0.3182  0.119  0.881  0.000     -1  0.000000         3          6
10     0.0000  0.000  1.000  0.000      0  0.250000         0          0
11     0.0000  0.000  1.000  0.000      0  0.100000         1          0
12     0.0000  0.000  1.000  0.000      0  0.000000

# SNOW INFO

In [21]:

#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy/output/SNOW/master.json') as f:
    palantir = json.load(f)
    

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    
    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()




     compound    neg    neu    pos  vader   txtblob  retweets  favorites
0      0.8617  0.116  0.633  0.251      1  0.000000         0          0
1      0.2960  0.000  0.913  0.087      1  0.000000         0          0
2      0.2960  0.000  0.872  0.128      1  0.000000         0          0
3      0.4215  0.000  0.863  0.137      1 -0.250000         0          0
4      0.4019  0.000  0.828  0.172      1  0.000000         5         13
5      0.2023  0.000  0.878  0.122      1  0.500000         1          0
6      0.0000  0.000  1.000  0.000      0  0.000000         0          0
7      0.2960  0.000  0.909  0.091      1  0.000000         0          0
8      0.4215  0.000  0.863  0.137      1 -0.250000         0          0
9      0.0000  0.000  1.000  0.000      0  0.000000         0          1
10     0.2960  0.000  0.855  0.145      1  0.000000         4          8
11     0.0000  0.000  1.000  0.000      0  0.383333         0          0
12     0.2960  0.000  0.845  0.155      1 -0.050000

# U INFO

In [20]:


#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy/output/U/master.json') as f:
    palantir = json.load(f)
    

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    
    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()




     compound    neg    neu    pos  vader   txtblob  retweets  favorites
0      0.4939  0.092  0.690  0.218      1  0.800000         1          1
1      0.4588  0.000  0.833  0.167      1  0.312500         0          0
2      0.0000  0.000  1.000  0.000      0 -0.350000         0          0
3      0.4939  0.098  0.671  0.232      1  0.800000         0          0
4      0.4767  0.000  0.853  0.147      1  0.000000         0          0
5      0.6956  0.000  0.759  0.241      1  0.400000         0          3
6      0.4939  0.092  0.690  0.218      1  0.800000         0          0
7      0.1779  0.147  0.632  0.221      1  0.000000         0          0
8      0.0000  0.000  1.000  0.000      0  0.250000         0          0
9      0.5574  0.000  0.813  0.187      1 -0.033333         0          0
10     0.5574  0.000  0.813  0.187      1 -0.033333         0          0
11     0.4939  0.000  0.714  0.286      1  0.000000         0          0
12     0.3400  0.000  0.862  0.138      1 -0.133333

# ZI INFO

In [19]:

#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy/output/ZI/master.json') as f:
    palantir = json.load(f)
    

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    

    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()




    compound    neg    neu    pos  vader   txtblob  retweets  favorites
0     0.0000  0.000  1.000  0.000      0  0.000000         0          0
1     0.0000  0.000  1.000  0.000      0  0.000000         0          0
2     0.2732  0.000  0.913  0.087      1  0.200000         0          0
3     0.8591  0.000  0.698  0.302      1  0.450000         0          0
4     0.8591  0.000  0.667  0.333      1  0.450000         2          5
5     0.4753  0.051  0.809  0.140      1  0.250000         0          0
6     0.0000  0.000  1.000  0.000      0 -0.062500         0          0
7     0.0000  0.000  1.000  0.000      0 -0.100000         0          1
8     0.4215  0.000  0.641  0.359      1  0.000000         0          0
9     0.5106  0.000  0.823  0.177      1  0.000000         0          0
10    0.5106  0.000  0.829  0.171      1  0.433333         3         11
11    0.5719  0.000  0.598  0.402      1  0.000000         0          0
12    0.3182  0.000  0.859  0.141      1 -0.100000         0    

# ZI AFTER INFO

In [18]:

#get the tweets from the json
with open('/Users/kylebryant/Desktop/cs591_sns_ipo_copy_copy/output/ZI/after/after_master.json') as f:
    palantir = json.load(f)
    

palantir_tweets = tweets_to_string(palantir) #array of related tweets
retweets = retweet_count(palantir)
favorites = favorite_count(palantir)

#print(palantir_tweets)

# calculate the sentiment for each tweet 
scores = [] # array that stores the sentiment value (neg, neu, pos, compound) 
            # where compound is the aggregate sentiment
analyser = SentimentIntensityAnalyzer()
for tweet in palantir_tweets:
    score = analyser.polarity_scores(tweet)
    scores.append(score)
    
# print the tweet and the associated score
# i = 0
# for i in range(len(palantir_tweets)):
    # print(str(i+1) + ".json: "+ palantir_tweets[i] + ", score = " + json.dumps(scores[i]))

txtblob = []
for tweet in palantir_tweets:
    #print(TextBlob(tweet).sentiment.polarity)
    txtblob.append(TextBlob(tweet).sentiment.polarity)
    
    
vader = []
for tweet in palantir_tweets:
    tmp = sentiment_analyzer_scores(tweet) 
    vader.append(tmp)
    
#Converting List of Dictionaries into Dataframe
dataFrame = pd.DataFrame(scores)
dataFrame['vader'] = vader
dataFrame['txtblob'] = txtblob
dataFrame['retweets'] = retweets
dataFrame['favorites'] = favorites
print(dataFrame)
#dataFrame.mean()



     compound    neg    neu    pos  vader   txtblob  retweets  favorites
0      0.2960  0.000  0.913  0.087      1  0.000000         0          0
1      0.0000  0.000  1.000  0.000      0  0.000000         0          0
2      0.5893  0.000  0.857  0.143      1  0.750000         0          0
3      0.2960  0.000  0.913  0.087      1  0.000000         0          0
4      0.0000  0.000  1.000  0.000      0  0.000000         1          0
5      0.2960  0.000  0.905  0.095      1  0.000000         2          4
6      0.3182  0.000  0.892  0.108      1 -0.100000         2          3
7      0.2023  0.000  0.917  0.083      1  0.125000         0          0
8      0.0000  0.000  1.000  0.000      0  0.000000         0          0
9      0.2732  0.000  0.877  0.123      1  0.000000         0          0
10    -0.1779  0.126  0.779  0.095     -1  0.000000         0          0
11     0.0000  0.000  1.000  0.000      0  0.250000         0          0
12     0.0000  0.000  1.000  0.000      0  0.000000