# Create Corpus of tweets with VADER and IBM Watson Tone Analyzer Results

### Notebook Input
* JSON files created from [1_trump-tweets-sentiment_pre-processing](1_trump-tweets-sentiment_pre-processing)


### Notebook Output
* JSON file of coronavirus-tweets corpus, along with metadata of VADER and IBM Watson Tone Analyzer
* JSON file of non-coronavirus-tweets corpus, along with metadata of VADER and IBM Watson Tone Analyzer

In [None]:
import json
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Initialize VADER
sid = SentimentIntensityAnalyzer()

In [None]:
coronavirus_tweets            = []
non_coronavirus_tweets        = []
coronavirus_tweets_corpus     = {'text': ''}
non_coronavirus_tweets_corpus = {'text': ''}

with open('corona_virus_tweets_cleaned.json', encoding="utf8") as f:
  coronavirus_tweets = json.load(f)

with open('non_corona_virus_tweets_cleaned.json', encoding="utf8") as f:
  non_coronavirus_tweets = json.load(f)

In [None]:
# Create the text corpus for both sets of tweets
for tweet in coronavirus_tweets:
    coronavirus_tweets_corpus['text'] = coronavirus_tweets_corpus['text'] + " " + tweet['text']
    
for tweet in non_coronavirus_tweets:
    non_coronavirus_tweets_corpus['text'] = non_coronavirus_tweets_corpus['text'] + " " + tweet['text']

In [None]:
coronavirus_tweets_corpus

In [None]:
non_coronavirus_tweets_corpus

In [None]:
# Append vader polarity score on coronavirus-related tweets
coronavirus_tweets_corpus['vader_polarity'] = sid.polarity_scores(coronavirus_tweets_corpus['text'])['compound']
coronavirus_tweets_corpus['vader_polarity']

In [None]:
# Append vader polarity score on non-coronavirus-related tweets
non_coronavirus_tweets_corpus['vader_polarity'] = sid.polarity_scores(non_coronavirus_tweets_corpus['text'])['compound']
non_coronavirus_tweets_corpus['vader_polarity']

In [None]:
# Append IBM Watson Tone Analyzer results to coronavirus-related tweets
with open("temp_text.txt", "w", encoding="utf8") as outfile:
    outfile.write(coronavirus_tweets_corpus['text'])

# Set placeholder values
coronavirus_tweets_corpus['analytical'] = 0.0
coronavirus_tweets_corpus['anger']      = 0.0
coronavirus_tweets_corpus['confident']  = 0.0
coronavirus_tweets_corpus['fear']       = 0.0
coronavirus_tweets_corpus['joy']        = 0.0
coronavirus_tweets_corpus['sadness']    = 0.0
coronavirus_tweets_corpus['tentative']  = 0.0        

# Run the text through the IBM Watson Tone Analyzer
response = !curl -X POST -u "apikey:MY-KEY" --header "Content-Type: text/plain" --data-binary @\Users\netho\Desktop\TRUMP_TWEETS\trump-corona-sentiment\temp_text.txt "MY-URL"
tone_analyzer_response = json.loads(response[-1])

# Set the tone values for each tone if present
for tone in tone_analyzer_response['document_tone']['tones']:
    if tone['tone_id'] == 'analytical':
        coronavirus_tweets_corpus['analytical'] = tone['score']
    elif tone['tone_id'] == 'anger':
        coronavirus_tweets_corpus['anger']      = tone['score']
    elif tone['tone_id'] == 'confident':
        coronavirus_tweets_corpus['confident']  = tone['score']
    elif tone['tone_id'] == 'fear':
        coronavirus_tweets_corpus['fear']       = tone['score']
    elif tone['tone_id'] == 'joy':
        coronavirus_tweets_corpus['joy']        = tone['score']
    elif tone['tone_id'] == 'sadness':
        coronavirus_tweets_corpus['sadness']    = tone['score']
    elif tone['tone_id'] == 'tentative':
        coronavirus_tweets_corpus['tentative']  = tone['score']

# Save to a JSON file
with open("corpus_coronavirus_tweets_with_tone_and_sentiment.json", "w", encoding="utf8") as outfile:
    json.dump(coronavirus_tweets_corpus, outfile)

In [None]:
coronavirus_tweets_corpus

In [None]:
# Append IBM Watson Tone Analyzer results to non-coronavirus-related tweets
with open("temp_text.txt", "w", encoding="utf8") as outfile:
    outfile.write(non_coronavirus_tweets_corpus['text'])

# Set placeholder values
non_coronavirus_tweets_corpus['analytical'] = 0.0
non_coronavirus_tweets_corpus['anger']      = 0.0
non_coronavirus_tweets_corpus['confident']  = 0.0
non_coronavirus_tweets_corpus['fear']       = 0.0
non_coronavirus_tweets_corpus['joy']        = 0.0
non_coronavirus_tweets_corpus['sadness']    = 0.0
non_coronavirus_tweets_corpus['tentative']  = 0.0        

# Run the text through the IBM Watson Tone Analyzer
response = !curl -X POST -u "apikey:MY-KEY" --header "Content-Type: text/plain" --data-binary @\Users\netho\Desktop\TRUMP_TWEETS\trump-corona-sentiment\temp_text.txt "MY-URL"
tone_analyzer_response = json.loads(response[-1])

# Set the tone values for each tone if present
for tone in tone_analyzer_response['document_tone']['tones']:
    if tone['tone_id'] == 'analytical':
        non_coronavirus_tweets_corpus['analytical'] = tone['score']
    elif tone['tone_id'] == 'anger':
        non_coronavirus_tweets_corpus['anger']      = tone['score']
    elif tone['tone_id'] == 'confident':
        non_coronavirus_tweets_corpus['confident']  = tone['score']
    elif tone['tone_id'] == 'fear':
        non_coronavirus_tweets_corpus['fear']       = tone['score']
    elif tone['tone_id'] == 'joy':
        non_coronavirus_tweets_corpus['joy']        = tone['score']
    elif tone['tone_id'] == 'sadness':
        non_coronavirus_tweets_corpus['sadness']    = tone['score']
    elif tone['tone_id'] == 'tentative':
        non_coronavirus_tweets_corpus['tentative']  = tone['score']

# Save to a JSON file
with open("corpus_non_coronavirus_tweets_with_tone_and_sentiment.json", "w", encoding="utf8") as outfile:
    json.dump(non_coronavirus_tweets_corpus, outfile)

In [None]:
non_coronavirus_tweets_corpus

### Summary
* We created a corpus of all the coronavirus-related tweets, as well as a corpus of all the non-coronavirus-related tweets. 
* We ran each corpus through VADER and IBM Watson Tone Analyzer. We save the results in two JSON files.