<a href="https://colab.research.google.com/github/mrbarokah/Python/blob/master/1_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##**1. Import Library**

* VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media.
Source : https://github.com/cjhutto/vaderSentiment




In [None]:
# Install Library
!pip install vaderSentiment

* Pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.
* NLTK is a leading platform for building Python programs to work with human language data.

In [None]:
# Import Library
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

##**2. Import Data**

In [None]:
# Import Data from Github
url = 'https://raw.githubusercontent.com/mrbarokah/Python/master/dataset/Text_GeneralMotor.csv'
df = pd.read_csv(url, sep=',',)

In [None]:
# Import Data from Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df = pd.read_csv("/content/drive/My Drive/Nama_File.csv")

In [None]:
# Import Data if Normal
df = pd.read_csv("/content/Nama_File.csv")

In [None]:
# Import Normal Data if UnicodeError Occured
df = pd.read_csv("/content/Nama_File.csv", encoding = "ISO-8859-1")

In [None]:
df.head(5)

In [None]:
df.shape

#**3. PreProcessing**

###a. Remove Duplicate

In [None]:
# Remove Duplicate Row from Table
df = df.drop_duplicates()
df.shape

###b. RemoveURL

In [None]:
# Remove Duplicate from Selected Column
df['text'] = df['text'].str.replace('http\S+|www.\S+', '', case=False)

###c. LowerCasing

In [None]:
# Merubah keseluruhan kalimat di kolom yang dipilih menjadi huruf kecil
df['text'] = df['text'].str.lower()

In [None]:
df.text

###d. RemoveUsername (Optional)

In [None]:
# Menghilangkan kata yang diawali oleh simbol @ pada kolom tertentu
df['text'] = df['text'].str.replace('@[^\s]+','', case=False)

###e. Tokenize (Optional)

In [None]:
nltk.download('punkt')

In [None]:
#Testing
example_text = df.iloc[0]
print(nltk.word_tokenize(example_text['text']))

In [None]:
def identify_tokens(row):
    text = row['text']
    tokens = nltk.word_tokenize(text)
    # taken only words (not punctuation)
    token_words = [w for w in tokens if w.isalpha()]
    return token_words

df['text'] = df.apply(identify_tokens, axis=1)

In [None]:
df.text

###f. Stemming (Optional)

In [None]:
from nltk.stem import PorterStemmer
stemming = PorterStemmer()

In [None]:
my_list = ['frightening', 'frightened', 'frightens']
print ([stemming.stem(word) for word in my_list])

In [None]:
def stem_list(row):
    my_list = row['text']
    stemmed_list = [stemming.stem(word) for word in my_list]
    return (stemmed_list)

df['stemmed_words'] = df.apply(stem_list, axis=1)

In [None]:
df.text

###g. Stopwords (Optional)

In [None]:
nltk.download('stopwords')
from nltk.corpus import stopwords
stops = set(stopwords.words("english"))

In [None]:
def remove_stops(row):
    text = row['text']
    meaningful_words = [w for w in text if not w in stops]
    return (meaningful_words)

df['text'] = df.apply(remove_stops, axis=1)

In [None]:
#Joining Text
df['text'] = df['text'].str.join(" ")

In [None]:
print(df['text'][0])

###h. Special Character (Optional)

In [None]:
import string

printable = set(string.printable)

def remove_spec_chars(in_str):
    return ''.join([c for c in in_str if c in printable])

data['text'].apply(remove_spec_chars)

#**4. Sentiment Analysis**

In [None]:
#Change Title to String
df['text'] = df['text'].astype(str)

In [None]:
# Import library for Text Analytics
import nltk
nltk.download('vader_lexicon')

In [None]:
# Sentiment Analysis
sid = SentimentIntensityAnalyzer()
listy = [] 
for index, row in df.iterrows():
  df['text']
  ss = sid.polarity_scores(row['text'])
  listy.append(ss)
  
se = pd.Series(listy)
df['polarity'] = se.values
display(df.head(10))

###a. Visualization

In [None]:
# Pie Chart
import matplotlib.pyplot as plt
labels = ['negative', 'neutral', 'positive']
sizes  = [ss['neg'], ss['neu'], ss['pos']]
plt.pie(sizes, labels=labels, autopct='%1.1f%%')
plt.axis('equal') 
plt.show()

###b. Save to CSV

In [None]:
df.to_csv('Output_File.csv', index=False)