<a href="https://colab.research.google.com/github/neelshah2409/Twitter-Sentiment-Analysis/blob/main/Twitter_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Twitter Sentiment Analysis 
#### Analyzing sentiments is a task of natural language processing. All the social media platforms need to keep a check on the sentiments of people engaged in a discussion.

In [None]:

#importing the libraries required

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import re
import nltk
import nltk
#reading the dataset
data = pd.read_csv('Twitter_Data.csv')
print(data.head())

                                          clean_text  category
0  when modi promised “minimum government maximum...      -1.0
1  talk all the nonsense and continue all the dra...       0.0
2  what did just say vote for modi  welcome bjp t...       1.0
3  asking his supporters prefix chowkidar their n...       1.0
4  answer who among these the most powerful world...       1.0


In [None]:
#analysing the dataset
data.info()

data.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162980 entries, 0 to 162979
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   clean_text  162980 non-null  object 
 1   Positive    162980 non-null  float64
 2   Negative    162980 non-null  float64
 3   Neutral     162980 non-null  float64
dtypes: float64(3), object(1)
memory usage: 5.0+ MB


(162980, 4)

In [None]:
#here the dataset contains lot of grammatical/language errors and special characters so we need to clean up them

nltk.download('stopwords')
stemmer = nltk.SnowballStemmer("english")
from nltk.corpus import stopwords
import string
stopword=set(stopwords.words('english'))

def clean(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = [word for word in text.split(' ') if word not in stopword]
    text=" ".join(text)
    text = [stemmer.stem(word) for word in text.split(' ')]
    text=" ".join(text)
    return text

data["clean_text"] = data["clean_text"].apply(clean)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
#Calculating the sentiment scores of these tweets

from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sentiments = SentimentIntensityAnalyzer()
data["Positive"] = [sentiments.polarity_scores(i)["pos"] for i in data["clean_text"]]
data["Negative"] = [sentiments.polarity_scores(i)["neg"] for i in data["clean_text"]]
data["Neutral"] = [sentiments.polarity_scores(i)["neu"] for i in data["clean_text"]]

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:

data = data[["clean_text", "Positive", 
             "Negative", "Neutral"]]
print(data.head())

                                          clean_text  ...  Neutral
0  modi promis “minimum govern maximum governance...  ...    0.889
1              talk nonsens continu drama vote modi   ...    1.000
2  say vote modi  welcom bjp told rahul main camp...  ...    0.805
3  ask support prefix chowkidar name modi great s...  ...    0.630
4  answer among power world leader today trump pu...  ...    1.000

[5 rows x 4 columns]


In [None]:
#looking at the most frequent scores 
x = sum(data["Positive"])
y = sum(data["Negative"])
z = sum(data["Neutral"])

def sentiment_score(a, b, c):
    if (a>b) and (a>c):
        print("Positive 😊 ")
    elif (b>a) and (b>c):
        print("Negative 😠 ")
    else:
        print("Neutral 🙂 ")
sentiment_score(x, y, z)

Neutral 🙂 


In [None]:
#total number of sentiment scores
print("Positive: ", x)
print("Negative: ", y)
print("Neutral: ", z)

Positive:  22450.175999999785
Negative:  15376.716999999946
Neutral:  125074.11200000446
