In [1]:
#Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import nltk 
from nltk.tokenize import word_tokenize
import emoji
from emoji.unicode_codes import UNICODE_EMOJI
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


import sys
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

def sentimentVader(file):

    #loading the data into the dataframe
    data=pd.read_csv(file, encoding='utf-8')
    
    data['text']=data.astype(str).apply(' '.join, axis=1)
    data=pd.DataFrame(data['text'])
    

    #Removing the duplicate rows from text column and resetting index
    data=data.drop_duplicates(['text'],keep='first')
    data=data.reset_index(drop=True)
    data['Original Text']=data['text']

    #converting emoji into the text
    import emoji
    for i in range(len(data)):
        data.loc[i,'text'] = emoji.demojize(data.loc[i,'text'])


    #converting special character "’" to "'" for contraction
    for i in range(len(data)):
        data.loc[i,'text']=data.loc[i,'text'].replace("’","'")

    sys.path.insert(0, 'C:\\Users\\gautam\\Desktop\\Tweets_Notebook')
    from contractions_1 import CONTRACTION_MAP

    def expand_contractions(text, contraction_mapping=CONTRACTION_MAP):

        contractions_pattern = re.compile('({})'.format('|'.join(contraction_mapping.keys())), 
                                          flags=re.IGNORECASE|re.DOTALL)
        def expand_match(contraction):
            match = contraction.group(0)
            first_char = match[0]
            expanded_contraction = contraction_mapping.get(match)\
                                    if contraction_mapping.get(match)\
                                    else contraction_mapping.get(match.lower())                       
            expanded_contraction = first_char+expanded_contraction[1:]
            return expanded_contraction

        expanded_text = contractions_pattern.sub(expand_match, text)
        expanded_text = re.sub("'", "", expanded_text)
        return expanded_text

    for i in range(len(data)):
        data.loc[i,'text'] = expand_contractions(str(data.loc[i,'text']))


    #data cleaning steps
    for i in range(len(data)):
        # Remove the word starting with @
        data.loc[i,'text'] = re.sub(r'@[A-Za-z0-9]+', ' ', str(data.loc[i,'text']))

        #Remove URL links
        data.loc[i,'text']  = re.sub('https?://[A-Za-z0-9./]+',' ',data.loc[i,'text'] ) 
        data.loc[i,'text']  = re.sub('http?://[A-Za-z0-9./]+',' ',data.loc[i,'text'] )


        # Converting to Lowercase
        data.loc[i,'text']  = data.loc[i,'text'] .lower()

        #Remove the new line characters
        data.loc[i,'text'] = re.sub(r"\t|\n|\r", " ", data.loc[i,'text'] , flags=re.I)

        #Remove punctuation
        data.loc[i,'text'] = re.sub(r"[,‘@\#-:'?\.$%_!()&;+”/…*•|“]", " ", data.loc[i,'text'] , flags=re.I)

        #Remove duble quotes
        data.loc[i,'text'] = re.sub(r'"', " ", data.loc[i,'text'] , flags=re.I)

        #Remove digits
        data.loc[i,'text'] = re.sub(r"\d", "", data.loc[i,'text'] )

        # remove all single characters
        data.loc[i,'text'] = re.sub(r'\s+[a-zA-Z]\s+', ' ', data.loc[i,'text'] )

        # Substituting multiple spaces with single space
        data.loc[i,'text']  = re.sub(r'\s+', ' ', data.loc[i,'text'] , flags=re.I)


    for i in range(len(data)):
        # removing rt from the data
        data.loc[i, 'text']=' '.join([x for x in data.loc[i,'text'].split() if x !='rt' and x !='nan'])  

         # remove all single characters
        data.loc[i,'text'] = re.sub(r'\s+[a-zA-Z]\s+', ' ', data.loc[i,'text'] )

        # Substituting multiple spaces with single space
        data.loc[i,'text']  = re.sub(r'\s+', ' ', data.loc[i,'text'] , flags=re.I)

    # remove remaining tokens that are not alphabetic
    for i in range(len(data)):
        data.loc[i, 'text']=' '.join([x for x in data.loc[i,'text'].split() if x.isalpha()])
    #     data.loc[i,'text'] = [word for word in data.loc[i,'text'] if word.isalpha()]



    stop_words = stopwords.words('english')
    stop_words.remove('no')
    stop_words.remove('not')
    for i in range(len(data)):
        data.loc[i,'text'] = ' '.join([word for word in data.loc[i,'text'].split() if not word in stop_words])


    # Lemmetization of words (it will change the word in the base form)
    lemmatizer = WordNetLemmatizer()
    for i in range(len(data)):
        data.loc[i,'text'] = ' '.join([lemmatizer.lemmatize(word,pos='a') for word in data.loc[i,'text'].split()])
        data.loc[i,'text'] = ' '.join([lemmatizer.lemmatize(word,pos='v') for word in data.loc[i,'text'].split()])
        data.loc[i,'text'] = ' '.join([lemmatizer.lemmatize(word,pos='n') for word in data.loc[i,'text'].split()])

    #Checking the words are english word or not, if not then remove it.
#     from nltk.corpus import words
#     for i in range(len(data)):
#         data.loc[i,'text'] = ' '.join([word for word in data.loc[i,'text'].split() if word in words.words()])


    for i in range(len(data)):
        # remove all single characters
        data.loc[i,'text'] = re.sub(r'\s+[a-zA-Z]\s+', ' ', data.loc[i,'text'] )

        # Substituting multiple spaces with single space
        data.loc[i,'text']  = re.sub(r'\s+', ' ', data.loc[i,'text'] , flags=re.I)


    for x in range(len(data)):
        #print(data_1.loc[x,'text'])
        data_sentiment = SentimentIntensityAnalyzer()
        data.loc[x,'Sentiment']=data_sentiment.polarity_scores(data.loc[x,'text'])['compound']
        
    data['Sentiment_rolled']= data['Sentiment'].apply(lambda x:  1 if x > 0.05 else (0 if (x <=0.05 and x>=-0.05)  else -1))
    data['Polarity'] = data['Sentiment_rolled'].apply(lambda x:  'Positive' if x ==1  else ('Neutral' if x == 0  else 'Negative'))
    
    data.to_csv('predicted_sentiment.csv', index=False)
    
    return data

In [2]:
file_1='D:\\Manisha\\Undergrad_Semester\\Semesters\\Fall2019\\capstone\\System.tweets_esports_olympics_Text.csv'

In [3]:
output=sentimentVader(file_1)

In [4]:
output.head(20)

Unnamed: 0,text,Original Text,Sentiment,Sentiment_rolled,Polarity
0,best player go head head whole world see start...,RT @IntelGaming: The best @RocketLeague player...,0.6369,1,Positive
1,trophy lequip special olympics catalunya sha p...,RT @btvesports: 🏆 L'equip Special Olympics Cat...,0.4019,1,Positive
2,best player go head head whole world see start...,RT @IntelGaming: The best @StreetFighter playe...,0.6369,1,Positive
3,,https://t.co/wBIbIBMLkc,0.0,0,Neutral
4,team international olympic committee intel wor...,The team-up of The International Olympic Commi...,0.0,0,Neutral
5,esports no place olympics get outside get fres...,@BrendanHickey1 ESports has no place in The Ol...,0.0258,0,Neutral
6,research ancient greek certainly not plan anno...,RT @t4_research: The ancient Greeks certainly ...,0.34,1,Positive
7,excite bring intel world open esports tourname...,RT @IntelGaming: We are excited to bring the I...,0.7906,1,Positive
8,stoke esports,Stoked for #esports in the @Olympics !! https:...,0.0,0,Neutral
9,esports still talk olympics still look great b...,eSports is still in talks to be in the Olympic...,0.743,1,Positive


In [5]:
file_2='D:\\Manisha\\Undergrad_Semester\\Semesters\\Fall2019\\capstone\\data_sources\\Destination_Kent_State_Student_Evaluation_2017.csv'

In [7]:
output2=sentimentVader(file_2)

In [8]:
output2.head(15)

Unnamed: 0,text,Original Text,Sentiment,Sentiment_rolled,Polarity
0,open end response open end response open end r...,Open-Ended Response Open-Ended Response Open-E...,0.0,0,Neutral
1,graduate diploma give money,nan nan nan nan nan To graduate with a diploma...,0.0,0,Neutral
2,,nan nan nan nan nan nan nan,0.0,0,Neutral
3,campus tour lunch best,nan nan nan Campus tours Lunch To do my best. nan,0.6369,1,Positive
4,spend night dorm helpful involve campus activi...,nan nan nan nan Spending a night in the dorms ...,0.5267,1,Positive
5,no would tour downtown kent walk around downto...,nan nan nan No. It would be touring Downtown K...,0.7783,1,Positive
6,college different high school talk flash guide...,nan nan nan how college is different from high...,0.0,0,Neutral
7,talk flashguides life like campus personal exp...,nan nan nan nan Talking to the flashguides abo...,0.8271,1,Positive
8,make sure set good amount time aside exam allo...,Make sure that they set a good amount of time ...,0.8311,1,Positive
9,buy book food live goal expectation,nan nan nan Buying books and when to do that. ...,0.0,0,Neutral
