In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

tfds.disable_progress_bar()

In [None]:
df = pd.read_csv('/kaggle/input/pfizer-vaccine-tweets/vaccination_tweets.csv', parse_dates = ['date'])

In [None]:
df.head()

# 1. Load Dataset for training the RNN

In [None]:
dataset, info = tfds.load('imdb_reviews', with_info = True,
                                  as_supervised = True)

train_dataset, test_dataset = dataset['train'], dataset['test']
train_dataset.element_spec

In [None]:
for review, label in train_dataset.take(1):
    print(review.numpy())
    print()
    print(label.numpy())

In [None]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [None]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
for example, label in train_dataset.take(1):
    print('texts: ', example.numpy()[:3])
    print()
    print('labels: ', label.numpy()[:3])

# 2. Create the text encoder

In [None]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
    max_tokens = VOCAB_SIZE
)

encoder.adapt(train_dataset.map(lambda text, label: text))

In [None]:
vocab = np.array(encoder.get_vocabulary())
vocab[:10]

In [None]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

In [None]:
for n in range(3):
    print(n + 1)
    print("Original: ", example[n].numpy())
    print()
    print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
    print()

# 3. Create the model

In [None]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim = len(vocab),
        output_dim = 64,
        mask_zero = True
    ),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation = 'relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
])

In [None]:
model.layers

In [None]:
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = True),
              optimizer = tf.keras.optimizers.Adam(1e-4),
              metrics = ['accuracy'])

# 4. Train the model

In [None]:
history = model.fit(train_dataset, epochs = 10,
                    validation_data = test_dataset,
                    validation_steps = 30)

In [None]:
def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])

In [None]:
test_loss, test_acc = model.evaluate(test_dataset)

print("test loss: ", test_loss)
print()
print("test accuracy: ", test_acc)

In [None]:
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plot_graphs(history, 'accuracy')
plt.ylim(None, 1)
plt.subplot(1, 2, 2)
plot_graphs(history, 'loss')
plt.ylim(0, None)

# 5. Make Predictions

In [None]:
def predict(text, threshold = 0.50):
    prediction = model.predict(text)
    if prediction <= 0.50:
        return 'neg'
    else:
        return 'pos'

In [None]:
text_transformed = []

for i in df.text.values:
    text_transformed.append([i])

***Seeing the length and breadth of the given dataset, I'm not extracting the sentiments of all the given tweets; however, it is important to acknowledge that the predictions are done on 'list' input, therefore, I have transformed the tweet 'strings' into lists.***