<a href="https://colab.research.google.com/github/nalin-singh/sentiment-analysis/blob/main/NLP_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
tokenizer = RegexpTokenizer(r'\w+')
englishStopwords = set(stopwords.words('english'))
ps = PorterStemmer()

In [None]:
def getStemmedReview(review):
    review=review.lower()
    review = review.replace(" "," ")
    
    tokens = tokenizer.tokenize(review)
    stopwords = [token for token in tokens if token not in englishStopwords]
    newToken = [ps.stem(tokens) for tokens in stopwords]
    
    cleanedReview = " ".join(newToken)
    
    return cleanedReview

In [None]:
sampleReview = """I loved this movie since I was 7 and I saw it on the opening day. It was so touching and beautiful. I strongly recommend seeing for all. It's a movie to watch with your family by far.
My MPAA rating: PG-13 for thematic elements, prolonged scenes of disastor, nudity/sexuality and some language."""

getStemmedReview(sampleReview)

'love movi sinc 7 saw open day touch beauti strongli recommend see movi watch famili far mpaa rate pg 13 themat element prolong scene disastor nuditi sexual languag'

In [None]:
trainingData = ["This was an awesome movie",
     "Great good movie! I liked it a lot",
     "Happy Ending! awesome acting by the hero",
     "loved it! truly great",
     "bad not upto the mark",
     "could have better",
     "did not like the movie",
     "Surely a Disappointing movie"]
     
targetSet = [1,1,1,1,0,0,0,0]

In [None]:
cleanedTrainingData =[]
for review in trainingData:
    cleanedTrainingData.append(getStemmedReview(review))
print(cleanedTrainingData)

['awesom movi', 'great good movi like lot', 'happi end awesom act hero', 'love truli great', 'bad upto mark', 'could better', 'like movi', 'sure disappoint movi']


In [None]:
# Vectorisation
from sklearn.feature_extraction.text import CountVectorizer
countVector= CountVectorizer()
vectorizedTrainingData = (countVector.fit_transform(cleanedTrainingData)).toarray()
print(vectorizedTrainingData)
print(countVector.get_feature_names())
print(vectorizedTrainingData.shape)
print(countVector.vocabulary_)

[[0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 0 0 0]
 [1 1 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1]
 [0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0]]
['act', 'awesom', 'bad', 'better', 'could', 'disappoint', 'end', 'good', 'great', 'happi', 'hero', 'like', 'lot', 'love', 'mark', 'movi', 'sure', 'truli', 'upto']
(8, 19)
{'awesom': 1, 'movi': 15, 'great': 8, 'good': 7, 'like': 11, 'lot': 12, 'happi': 9, 'end': 6, 'act': 0, 'hero': 10, 'love': 13, 'truli': 17, 'bad': 2, 'upto': 18, 'mark': 14, 'could': 4, 'better': 3, 'sure': 16, 'disappoint': 5}




In [None]:
# Applying Multinomial Naive Bayes
from sklearn.naive_bayes import MultinomialNB

testingReview = ["The movie I saw was bad", "I was happy and I loved the acting in the movie"]

mnb = MultinomialNB()
#Training
mnb.fit(vectorizedTrainingData,targetSet)
#prediction
cleanTestingReviews = [getStemmedReview(review) for review in testingReview]
testVector = countVector.transform(cleanTestingReviews).toarray()

print (mnb.predict(testVector))

[0 1]


In [None]:
for prediction in mnb.predict(testVector):
    if prediction==1:
        print("Positive Review\n")
    else:
        print("Negative Review\n")

Negative Review

Positive Review

