<h1 style="color:rgb(0,0,150);font-weight:bold;font-size:2em;text-align:center">
CORONAVIRUS TWEETS CLASSIFIER
</h1>
<center>
<img
src="https://www.asata.co.za/wp-content/uploads/2020/03/corona-4901878_640.jpg"
style="width: 100%"/>
</center>
<br>
<h3 style="font-weight: bold">Context</h3>
<p>Sentiment analysis studies the subjective information in an expression, that is, the opinions, appraisals, emotions, or attitudes towards a topic, person or entity. Expressions can be classified as positive, negative, or neutral. 
</p>
<h3 style="font-weight: bold">Content</h3>
<p>The dataset contains information about</p>
<ul type="square">
<li>UserName</li>
<li>ScreenName</li>
<li>Location</li>
<li>TweetAt</li>
<li>OriginalTweet</li>
<li>Sentiment</li>
</ul>

<h3 style="font-weight: bold">Contents:</h3>
<ul type="square" style="color:blue">
<li>Importing Packages</li>
<li>Importing Data</li>
<li>Analysing Data</li>
<li>Data Overview</li>
<li>Visualization</li>
<li>Training Models</li>
<li>Evaluation Metrics</li>
<li>Dumping Model</li>
</ul>

# Importing Packages

In [None]:
import pandas as pd
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
import re
import category_encoders as ce
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns
import pickle

# Importing Data

In [None]:
df_train = pd.read_csv("../input/covid-19-nlp-text-classification/Corona_NLP_train.csv", encoding = "ISO-8859-1")
df_test = pd.read_csv("../input/covid-19-nlp-text-classification/Corona_NLP_test.csv")

In [None]:
df_train.head()

In [None]:
df_test.head()

# Analysing Data

In [None]:
df_train["Sentiment"].value_counts()

In [None]:
corpus = []
stemmer = PorterStemmer() 
for i in range(len(df_train)):
    review = re.sub('[^a-zA-Z]'," ",df_train["OriginalTweet"][i])
    review = review.lower()
    review = review.split()
    review = [stemmer.stem(word) for word in review if word.lower() not in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

In [None]:
corpus[1:10]

In [None]:
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(corpus).toarray()

In [None]:
def ordinal_encoding(df,col,mapping):
    ordinal_encoder = ce.OrdinalEncoder(cols = [col],return_df = True,mapping = [{'col':col,'mapping':mapping}])
    df_final = ordinal_encoder.fit_transform(df)
    return df_final

# Data Overview

In [None]:
plot = sns.countplot(x='Sentiment', data=df_train).set_xticklabels(labels=['Neutral', 'Positive', 'Extremely Negative', 'Negative','Extremely Positive'],rotation=20)

In [None]:
dict = {'Neutral':3, 'Positive':2, 'Extremely Negative':5, 'Negative':4,
       'Extremely Positive':1}

In [None]:
df_train_final = ordinal_encoding(df_train,"Sentiment",dict)

In [None]:
def minimize_categories(val):
    if val == 1 or val == 2:
        return 1
    elif val == 3:
        return 2
    elif val == 4 or val == 5:
        return 3

In [None]:
df_train_final["Sentiment"] = df_train_final["Sentiment"].apply(minimize_categories)

In [None]:
sns.countplot(x='Sentiment', data=df_train_final)

# Visualization

In [None]:
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
import numpy as npy
from PIL import Image
import requests
import io
response = requests.get("https://res.cloudinary.com/maxie/image/upload/v1617197755/TEMP/covid_ywd7ph.jpg")
image_bytes = io.BytesIO(response. content)
dataset = " ".join(corpus)
def create_word_cloud(string):

    maskArray = npy.array(Image.open(image_bytes))
    cloud = WordCloud(background_color = "black", max_words = 150, mask = maskArray, stopwords = set(STOPWORDS),contour_width=1, contour_color='#333')
    cloud.generate(string)
#     cloud.to_file("wordCloud.png")
    return cloud
dataset = dataset.lower()
wordcloud=create_word_cloud(dataset)
plt.figure(figsize=[20,10])
plt.imshow(wordcloud) # image show
plt.axis('off') # to off the axis of x and y
plt.show()

In [None]:
Y = df_train_final["Sentiment"]
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.2,random_state= 0,stratify =Y)

# Training Models

In [None]:
multinb = MultinomialNB()
multinb.fit(X_train,Y_train)

In [None]:
Y_pred_multinb = multinb.predict(X_test)

In [None]:
log = LogisticRegression()
log.fit(X_train,Y_train)
Y_pred_log = log.predict(X_test)

# Evaluation Metrics

In [None]:
accuracy_score(Y_pred_multinb,Y_test)

In [None]:
print(classification_report(Y_pred_multinb,Y_test))

In [None]:
accuracy_score(Y_pred_log,Y_test)

In [None]:
print(classification_report(Y_pred_log,Y_test))

In [None]:
text = "T 3590 -I have tested CoviD positive ..  shifted to Hospital  .. hospital informing  authorities .. family and staff undergone tests , results awaited ..All that have been in close proximity to me in the last 10 days are requested to please get themselves tested !"
# predict(text)

In [None]:
def preprocess_text(text):
    corpus = []
    stemmer = PorterStemmer()
    review = re.sub('[^a-zA-Z]'," ",text)
    review = review.lower()
    review = review.split()
    review = [stemmer.stem(word) for word in review if word.lower() not in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)
    return corpus

In [None]:
corpus = preprocess_text(text)
X = tfidf.transform(corpus).toarray()
prediction = log.predict(X)[0]
if prediction == 1:
    print("Your Tweet is Negative")
elif prediction == 2:
    print("Your Tweet is Neutral")
elif prediction == 3:
    print("your Tweet is Positive")

# Dumping Model

In [None]:
pickle.dump(log,open("model_tweet.pkl","wb"))
pickle.dump(tfidf,open("tfidf_tweet.pkl","wb"))

<center><img src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRWDgteV-sNXVRkc0xwyodmJt18ImebZ1UcKA&amp;usqp=CAU"></center>

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=0849d265-fb72-44e9-85d2-abe70c414f0a' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>