In [90]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import re
import nltk
from nltk.corpus import stopwords
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
from sklearn.metrics import accuracy_score

In [91]:
df = pd.read_csv("fifa_world_cup_2022_tweets.csv")

In [92]:
df.head()

Unnamed: 0.1,Unnamed: 0,Date Created,Number of Likes,Source of Tweet,Tweet,Sentiment
0,0,2022-11-20 23:59:21+00:00,4,Twitter Web App,What are we drinking today @TucanTribe \n@MadB...,neutral
1,1,2022-11-20 23:59:01+00:00,3,Twitter for iPhone,Amazing @CanadaSoccerEN #WorldCup2022 launch ...,positive
2,2,2022-11-20 23:58:41+00:00,1,Twitter for iPhone,Worth reading while watching #WorldCup2022 htt...,positive
3,3,2022-11-20 23:58:33+00:00,1,Twitter Web App,Golden Maknae shinning bright\n\nhttps://t.co/...,positive
4,4,2022-11-20 23:58:28+00:00,0,Twitter for Android,"If the BBC cares so much about human rights, h...",negative


In [93]:
df.drop(["Unnamed: 0", "Date Created", "Number of Likes", "Source of Tweet"], axis=1, inplace=True)

In [94]:
df.head()

Unnamed: 0,Tweet,Sentiment
0,What are we drinking today @TucanTribe \n@MadB...,neutral
1,Amazing @CanadaSoccerEN #WorldCup2022 launch ...,positive
2,Worth reading while watching #WorldCup2022 htt...,positive
3,Golden Maknae shinning bright\n\nhttps://t.co/...,positive
4,"If the BBC cares so much about human rights, h...",negative


In [95]:
def preprocess_text(text):
    text = re.sub(r'\bhttps?://\S+\b', 'http', text)
    text = re.sub(r'(^|\s)@(\w+)', r'\1@user', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    words = text.split()
    words = [word for word in words if not word in stopwords.words("english")]
    text = " ".join(words)
    return text

In [96]:
df["clean"] = df["Tweet"].apply(preprocess_text)

In [97]:
dummy_sentiment = pd.get_dummies(df["Sentiment"], dtype="int64")
dummy_sentiment.head()

Unnamed: 0,negative,neutral,positive
0,0,1,0
1,0,0,1
2,0,0,1
3,0,0,1
4,1,0,0


In [98]:
df = df.drop(["Tweet", "Sentiment"], axis=1)
df = pd.concat([df, dummy_sentiment], axis=1)

In [120]:
X = df["clean"]
y = df[df.columns[1:]].values

In [121]:
vectorizer = TextVectorization(max_tokens=50000, output_sequence_length=1800, output_mode='int')

In [122]:
vectorizer.adapt(X.values)

In [123]:
vectorized_text = vectorizer(X.values)

In [124]:
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
dataset = dataset.cache()
dataset = dataset.shuffle(25000)
dataset = dataset.batch(16)
dataset = dataset.prefetch(8) 

In [125]:
len(dataset)

1408

In [126]:
train = dataset.take(int(len(dataset) * .7))
val = dataset.skip(int(len(dataset) * .7)).take(int(len(dataset) * .2))
test = dataset.skip(int(len(dataset) * .9)).take(int(len(dataset) * .1))

In [127]:
train_generator = train.as_numpy_iterator()

2023-06-08 22:40:56.063281: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [22524,3]
	 [[{{node Placeholder/_1}}]]
2023-06-08 22:40:56.063646: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [22524,1800]
	 [[{{node Placeholder/_0}}]]


In [128]:
model = Sequential([
    Embedding(50000+1, 32),
    Bidirectional(LSTM(32, activation='tanh')),
    Dense(128, activation='relu'),
    Dense(256, activation='relu'),
    Dense(3, activation='sigmoid'),
])

model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=["accuracy"])

2023-06-08 22:40:56.630971: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-08 22:40:56.632781: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-08 22:40:56.634030: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [129]:
model.compile(loss='BinaryCrossentropy', optimizer='Adam')

In [130]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, None, 32)          1600032   
                                                                 
 bidirectional_3 (Bidirectio  (None, 64)               16640     
 nal)                                                            
                                                                 
 dense_11 (Dense)            (None, 128)               8320      
                                                                 
 dense_12 (Dense)            (None, 256)               33024     
                                                                 
 dense_13 (Dense)            (None, 3)                 771       
                                                                 
Total params: 1,658,787
Trainable params: 1,658,787
Non-trainable params: 0
____________________________________________

In [131]:
history = model.fit(train, epochs=1, validation_data=val)

2023-06-08 22:40:58.452491: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-08 22:40:58.454848: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-08 22:40:58.456711: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2023-06-08 22:50:04.937002: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [22524,1800]
	 [[{{node Placeholder/_0}}]]
2023-06-08 22:50:04.937405: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [22524,1800]
	 [[{{node Placeholder/_0}}]]
2023-06-08 22:50:05.240391: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/s



In [132]:
history_df = pd.DataFrame(history.history)
history_df.head()

Unnamed: 0,loss,val_loss
0,0.467259,0.323616


In [133]:
input_text = vectorizer("watching this match is a waste of time")
batch = test.as_numpy_iterator().next()
batch_X, batch_y = test.as_numpy_iterator().next()
(model.predict(batch_X) > 0.5).astype(int)

2023-06-08 22:51:39.600393: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [22524,3]
	 [[{{node Placeholder/_1}}]]
2023-06-08 22:51:39.600738: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int64 and shape [22524,1800]
	 [[{{node Placeholder/_0}}]]
2023-06-08 22:51:40.044435: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/spli



array([[0, 0, 1],
       [1, 0, 0],
       [0, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0]])

In [134]:
res = model.predict(batch_X)



In [135]:
model.save("wc.h5")

In [136]:
comment = "watching this match is a waste of time"
vectorized_comment = vectorizer([comment])
results = model.predict(vectorized_comment)

for idx, col in enumerate(df.columns[2:-1]):
    print("{}: {}".format(col, results[0][idx] > 0.5))

neutral: False
