In [11]:
import os
import re
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from debate_preprocessing import preprocessingDebate
from tweets_preprocessing import preprocess_data

## 1. Loading the RNN model

In [12]:
model = tf.keras.models.load_model("../models/sentiment")

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_vectorization (TextVect (None, None)              0         
_________________________________________________________________
embedding (Embedding)        (None, None, 64)          64000     
_________________________________________________________________
bidirectional (Bidirectional (None, None, 128)         66048     
_________________________________________________________________
bidirectional_1 (Bidirection (None, 64)                41216     
_________________________________________________________________
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 6

## 2. Loading the data

In [14]:
df_debate = preprocessingDebate("../data/us_election_2020_2nd_presidential_debate.csv")

In [15]:
trump_file = "../data/hashtag_donaldtrump.csv"
biden_file = "../data/hashtag_joebiden.csv"

df_trump = preprocess_data(trump_file)
df_biden = preprocess_data(biden_file)

Preprocessing started!

Data has been loaded!

Dates have been transformed to datetime objects!

Selecting only the tweets duting the debate.

Data preprocessing complete! New .csv has been created /data/debate_tweets.csv
--- 13.653274059295654 seconds ---
Preprocessing started!

Data has been loaded!

Dates have been transformed to datetime objects!

Selecting only the tweets duting the debate.

Data preprocessing complete! New .csv has been created /data/debate_tweets.csv
--- 10.012283086776733 seconds ---


## 3. Making predictions

### 3.1 Debate

In [16]:
sentiment_predictions = []
for row_ind in range(len(df_debate)):
    
    text = np.array([str(df_debate.iloc[row_ind].text)])
    prediction = model.predict(text)
    sentiment_predictions.append(round(prediction[0][0], 3))
    
df_debate['sentiment_score'] = sentiment_predictions

In [17]:
df_debate

Unnamed: 0,speaker,minute,text,num_words,time_between,interrupted,sentiment_score
0,Kristen Welker,1900-01-01 00:00:18,"Good evening, everyone. Good evening. Thank yo...",258,439,0,0.332
1,Donald Trump,1900-01-01 00:07:37,How are you doing? How are you?,7,21,0,0.126
2,Kristen Welker,1900-01-01 00:07:58,And I do want to say a very good evening to bo...,222,66,0,-0.216
3,Donald Trump,1900-01-01 00:09:04,"So as you know, 2.2 million people modeled out...",365,122,0,1.566
4,Kristen Welker,1900-01-01 00:11:06,Okay. Former Vice President Biden to you. How ...,22,7,1,-1.372
...,...,...,...,...,...,...,...
461,Kristen Welker,1900-01-01 00:24:30,"All right. This is about leadership, gentlemen...",48,0,0,1.905
462,Donald Trump,1900-01-01 00:24:47,We have to make our country totally successful...,193,0,0,3.753
463,Kristen Welker,1900-01-01 00:25:49,"All right. Vice President Biden, same question...",25,0,0,0.585
464,Joe Biden,1900-01-01 00:25:57,"I will say, I’m an American President. I repre...",174,0,0,0.993


### 3.2 Tweets

In [18]:
trump_sentiment = []
for row_ind in range(len(df_trump)):

    #  stripping tweets of @tags, #hashtags and links
    format_tweet = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ",df_trump.iloc[row_ind].tweet).split())
    text = np.array([format_tweet])
    prediction = model.predict(text)
    trump_sentiment.append(round(prediction[0][0], 3))
    
df_trump['sentiment_score'] = trump_sentiment

In [19]:
biden_sentiment = []
for row_ind in range(len(df_biden)):

    #  stripping tweets of @tags, #hashtags and links
    format_tweet = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ",df_biden.iloc[row_ind].tweet).split())
    text = np.array([format_tweet])
    prediction = model.predict(text)
    biden_sentiment.append(round(prediction[0][0], 3))
    
df_biden['sentiment_score'] = biden_sentiment

## 4. Exporting the files

In [20]:
df_trump.to_csv("../data/trump_tweets_preprocessed.csv", index=False)
df_biden.to_csv("../data/biden_tweets_preprocessed.csv", index=False)
df_debate.to_csv("../data/debate_preprocessed.csv", index=False)