## Sentiment Determination

### Import Libraries

In [44]:
import tweepy
import datetime
import json
from datetime import datetime, timedelta
import csv

import pandas as pd
import numpy as np

from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer

In [7]:
#Import key and tokens from config.py
from config import *

In [85]:
#Upload pre-trained model
model = load_model('rnn_best_models/best_model_gru.h5')

In [90]:
def import_tweets(query="(#btc OR #bitcoin OR bitcoin) is:verified -has:media -is:retweet lang:en"):
    
    """ 
    Function imports recent tweets from last 7 days. 
    Default query is: "(#btc OR #bitcoin OR bitcoin) is:verified -has:media -is:retweet lang:en" 
    Returns file path of csv file with tweets' data.
    File name indicates start and end dates, i.e 2022-03-05T22:45:00Z_2022-02-27T00:45:00Z.csv.
    """
    
    #Provide access tokens
    client = tweepy.Client(bearer_token=bearer_token, 
                       consumer_key=consumer_key, 
                       consumer_secret=consumer_secret, 
                       access_token=acces_token, 
                       access_token_secret=token_secret,
                       wait_on_rate_limit=True)
    
    #Get dates in ISO format YYYY-MM-DDTHH:mm:ssZ (ISO 8601/RFC 3339).
    #today('end_time') must be a minimum of 10 seconds prior to the request time. 
    #We add an hour to avoid the newest tweets without comlete public metrics data 
    today = datetime.now() - timedelta(hours=1) + timedelta(hours=5)
    seven_days_back = today - timedelta(days=6, hours=22)

    today = today.strftime('%Y-%m-%dT%H:%M:%SZ')
    seven_days_back = seven_days_back.strftime('%Y-%m-%dT%H:%M:%SZ')
    
    #Open/create a file to append data to
    csvFile = open('api_csv/' + today + '_' + seven_days_back + '.csv', 'a')

    # # Use csv writer
    csvWriter = csv.writer(csvFile)

    csvWriter.writerow(['item.id',
                        'item.author_id',
                        'item.created_at',
                        'item.source', 
                        'item.public_metrics["retweet_count"]',
                        'item.public_metrics["reply_count"]',
                        'item.public_metrics["like_count"]',
                        'item.public_metrics["like_count"]',
                        'item.text'])

    for response in tweepy.Paginator(client.search_recent_tweets, query=query, 
                                  start_time=str(seven_days_back),
                                  end_time=str(today),
                                  tweet_fields='id,author_id,created_at,geo,public_metrics,source,text',
                                  user_fields='id,name,username,public_metrics',
                                  place_fields='full_name,country,country_code,geo',
                                  expansions='author_id,geo.place_id',
                                  max_results=100, limit=1200):

        # Write a row to the CSV file. I use encode UTF-8
        for item in response.data:
            # Write a row to the CSV file. I use encode UTF-8
            csvWriter.writerow([item.id,
                                item.author_id,
                                item.created_at,
                                item.source, 
                                item.public_metrics["retweet_count"],
                                item.public_metrics["reply_count"],
                                item.public_metrics["like_count"],
                                item.public_metrics["like_count"],
                                item.text])

    csvFile.close()
    
    file_path = 'api_csv/' + today + '_' + seven_days_back + '.csv'
    
    return file_path

In [36]:
def process_tweets(file_path):
    """ 
    Function processes and cleans up csv file.  
    Returns file path of csv file with tweets' data.
    """
    df_tweets = pd.read_csv(file_path)
    df_text = df_tweets[['item.created_at', 'item.text']]
    df_text.columns = ['created_at', 'text']
    
    return df_text

In [86]:
def process_text(text, 
                 max_tweet_len=45,
                 model = model):
    """ 
    Function processes tweet text content and create additional column with snetiment result.  
    Predictions are made with pre-trained RNN model.
    """
    
    label_names = {0: 'negative', 1: 'neutral', 2: 'positive'}
    
    text = [text]
    
    # Convert training data into tensors to feed into neural net
    t = Tokenizer()
    # Create tokenizer
    t.fit_on_texts(text)
    
    # This class allows to vectorize a text corpus, by turning each text into either a sequence of integers
    sequences = t.texts_to_sequences(text)
    
    #Truncate and pad input sequences to be all the same lenght vectors
    padded_data = pad_sequences(sequences, maxlen=max_tweet_len)

    pred = model.predict(padded_data)
    
    return label_names[np.argmax(pred)]

In [37]:
#Create a new csv file with tweets' data
file_path = import_tweets()

In [38]:
#Process the csv file - create a dataframe with the 
df_text = process_tweets(file_path)

In [39]:
df_text.head()

Unnamed: 0,created_at,text
0,2022-03-05 22:38:56+00:00,Why is not everyone using Satsback to get free...
1,2022-03-05 22:36:12+00:00,@Tether_to @maxkeiser “Bishop of Bitcoin”
2,2022-03-05 22:31:33+00:00,"#Rarify, a company that deals with producing i..."
3,2022-03-05 22:31:28+00:00,@joshmeyerrx No matter how you hold dollars (o...
4,2022-03-05 22:24:00+00:00,Dear Crypto bros. We don’t usually see eye to ...


In [87]:
df_text['label'] = df_text['text'].map(process_text)

In [88]:
df_text.head()

Unnamed: 0,created_at,text,label
0,2022-03-05 22:38:56+00:00,Why is not everyone using Satsback to get free...,positive
1,2022-03-05 22:36:12+00:00,@Tether_to @maxkeiser “Bishop of Bitcoin”,neutral
2,2022-03-05 22:31:33+00:00,"#Rarify, a company that deals with producing i...",positive
3,2022-03-05 22:31:28+00:00,@joshmeyerrx No matter how you hold dollars (o...,neutral
4,2022-03-05 22:24:00+00:00,Dear Crypto bros. We don’t usually see eye to ...,positive
