In [39]:
import twint
import nest_asyncio
import pandas as pd
import re
import pickle
nest_asyncio.apply()

# Extract tweets using TWINT

In [40]:
c = twint.Config()
c.Search = 'What is coronavirus'
c.Limit = 100
c.Count = 100
c.Output = 'tweets.csv'
result = twint.run.Search(c)

Erupts> @BenBonnema Total lack of awareness as to what is going on in other Trader Joe Stores. Mask protests continue as coronavirus cases surge with one leading a California Trader Joe's to close early  https://t.co/lUU4lNeusH via @usatoday
1365711336475394052 2021-02-27 18:12:10 +0100 <ColemoiHK> Opening of the #TheIndiaToyFair2021 #today, probably the first virtual #toysfair worldwide due to #coronavirus  Register and visit  https://t.co/7BRAN275PO  And do not forget to visit  https://t.co/VmeaMtP7Jk in order to check what is new by us.  😉  #toybiz #toyindustry #business
1365710352957247488 2021-02-27 18:08:16 +0100 <phieesert> @Lukolmsol Yes, and it's because I know it's not coronavirus, I know what is wrong with me, it's just that the medications I'm taking doesn't seem to cure it and it's frustrating 🥺🥺
1365707752748822531 2021-02-27 17:57:56 +0100 <Killing87152220> what is even more baffling is, that UK Community Production Ltd. had the masks tested by the 20/30 Labs for the bac

# Load the tweets as a dataframe and check for any required preprocessing

In [41]:
df = pd.read_csv('tweets.csv', names=['tweet'], delimiter='\n')

In [42]:
df.head()

Unnamed: 0,tweet
0,1365808102075957249 2021-02-28 00:36:41 +0100 ...
1,1365803961316483072 2021-02-28 00:20:14 +0100 ...
2,1365803359035400194 2021-02-28 00:17:50 +0100 ...
3,1365792324555599874 2021-02-27 23:34:00 +0100 ...
4,1365782381031022600 2021-02-27 22:54:29 +0100 ...


# Use regex to remove tweet ID, date, time, user, and mentions - extract ONLY  the tweet content

In [43]:
df['tweet'] = df['tweet'].str.replace(r"(\d+)|([--])|([::])|([+])|(<\S+)|(@\S+)|", "")

In [44]:
df.head()

Unnamed: 0,tweet
0,So what is coronavirus and where did i...
1,What is happening in the #NBA with race. ...
2,I'll trust my immune system thanks. S...
3,What makes #coronavirus containment measu...
4,Thank you. I hope better times will come...


# Remove whitespaces in the beginning

In [45]:
df['tweet'] = df['tweet'].str.replace(r"^\s+", "")

In [46]:
df.head()

Unnamed: 0,tweet
0,So what is coronavirus and where did it came f...
1,What is happening in the #NBA with race. Jerem...
2,I'll trust my immune system thanks. So what is...
3,What makes #coronavirus containment measures s...
4,Thank you. I hope better times will come. Wha...


# Load the vectorizer and classifier to transform and load into the model

In [47]:
x = df['tweet']

vectorizer = pickle.load(open('model/vectorizer.pickle', 'rb'))
x_test = vectorizer.transform(x).toarray()

model = pickle.load(open('model/classifier.pickle', 'rb'))

# Make sentiment prediction

In [48]:
predictions = model.predict(x_test)
predictions

array([0, 0, 0, ..., 2, 2, 2], dtype=int64)

In [49]:
positive_percentage = 100*sum(predictions==2) / len(predictions)
negative_percentage = 100*sum(predictions==0) / len(predictions)
print('Percent of positive sentiment: {:.2f}%'.format(positive_percentage))
print('Percent of negative sentiment: {:.2f}%'.format(negative_percentage))

Percent of positive sentiment: 19.97%
Percent of negative sentiment: 80.03%
