In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
import joblib
from sklearn.feature_extraction.text import CountVectorizer

In [30]:
data = pd.read_csv('.\\datasets\\app_review.csv')

In [18]:
data.tail()

Unnamed: 0,package_name,review,polarity
886,com.rovio.angrybirds,loved it i loooooooooooooovvved it because it...,1
887,com.rovio.angrybirds,all time legendary game the birthday party le...,1
888,com.rovio.angrybirds,ads are way to heavy listen to the bad review...,0
889,com.rovio.angrybirds,fun works perfectly well. ads aren't as annoy...,1
890,com.rovio.angrybirds,they're everywhere i see angry birds everywhe...,1


## Pre-process Data:
We need to remove package name as it's not relevant. Then convert text to lowercase for CSV data. 

In [19]:
def preprocess_data(data):
    # Remove package name as it's not relevant
    data = data.drop('package_name', axis=1)
    
    # Convert text to lowercase
    data['review'] = data['review'].str.strip().str.lower()
    return data

In [20]:
data = preprocess_data(data)

In [21]:
data.head()

Unnamed: 0,review,polarity
0,privacy at least put some option appear offlin...,0
1,"messenger issues ever since the last update, i...",0
2,profile any time my wife or anybody has more t...,0
3,the new features suck for those of us who don'...,0
4,forced reload on uploading pic on replying com...,0


## Splitting Data
First, separate the columns into dependent and independent variables (or features and label). Then you split those variables into train and test set.

In [22]:
# Split into training and testing data
x = data['review']
y = data['polarity']
x, x_test, y, y_test = train_test_split(x,y, stratify=y, test_size=0.25, random_state=42)

Vectorize text reviews to numbers.

In [23]:
# Vectorize text reviews to numbers
vec = CountVectorizer(stop_words='english')
x = vec.fit_transform(x).toarray()
x_test = vec.transform(x_test).toarray()

## Model Generation
After splitting and vectorize text reviews to number, we will generate a random forest model on the training set and perform prediction on test set features.

In [24]:
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(x, y)

MultinomialNB()

## Evaluating Model
After model generation, check the accuracy using actual and predicted values.

In [25]:
model.score(x_test, y_test)

0.8565022421524664

In [28]:
sentiment= model.predict(vec.transform(['I am very happy and satisfied with the usage of your app']))
print("Sentence Overall Rated As", end = " ")
	# decide sentiment as positive, negative and neutral
if sentiment == 1 :
	print("Positive")
elif sentiment == 0 :
	print("Negative")



Sentence Overall Rated As Positive


In [29]:
sentiment= model.predict(vec.transform(['I am dissatisfied with the usage of your app']))
print("Sentence Overall Rated As", end = " ")
	# decide sentiment as positive, negative and neutral
if sentiment == 1 :
	print("Positive")
elif sentiment == 0 :
	print("Negative")

Sentence Overall Rated As Negative
