## Install Library

In [21]:
!pip install google-play-scraper



## Import Library

In [22]:
import time
from sklearn import svm
from sklearn.metrics import classification_report

In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [24]:
from google_play_scraper import app, Sort, reviews_all

In [25]:
import numpy as np
import pandas as pd

## Scraping Mobile Banking Review Data

Create empty dataframe

In [26]:
df_review = pd.DataFrame(columns = ['userName', 'content', 'score'])

Mobile Banking ID in Play Store (Using BNI, BCA, BRI, and BSI Review Data)

In [27]:
def getPlaystore(number):
  if number == 1:
    return 'src.com.bni'
  elif number == 2:
    return 'com.bca'
  elif number == 3:
    return 'id.bmri.livin'
  elif number == 4:
    return 'id.co.bri.brimo'
  elif number == 5:
    return 'com.bsm.activity2'

Run Scraper and append to exisiting DataFrame

In [28]:
for apps in range(1,6):
  link_apps = getPlaystore(apps)
  id_reviews = reviews_all(link_apps,
                        sleep_milliseconds=0,
                        country='id',
                        sort=Sort.NEWEST
                        )
  df_scraping = pd.DataFrame(np.array(id_reviews), columns=['review'])
  df_scraping = df_scraping.join(pd.DataFrame(df_scraping.pop('review').tolist()))
  df_scraping = df_scraping[['userName','content','score']]
  df_review = df_review.append(df_scraping, ignore_index = True)

In [9]:
df_review['content'] = df_review['content'].astype(str)

Check Amount of Review Data

In [44]:
print('Amount of data: ' + str(len(df_review)))

Amount of data: 101055


Labeling 'pos' and 'neg' based on star<br>

*   1-3 Star = Negative <br>
*   4-5 Star = Positive 

In [11]:
def labeling(score):
  if score <= 3:
    return 'neg'
  elif score > 3:
    return 'pos'

In [12]:
df_review['label'] = df_review['score'].map(lambda x: labeling(x))

In [13]:
df_review.head()

Unnamed: 0,userName,content,score,label
0,jenar mahesa aji,siip...🤸‍♀️,4,pos
1,agung susetyo,easy use and help full..,5,pos
2,Bram Brahmantiyo,cukup membantu,5,pos
3,Ulfah SF,good,5,pos
4,An an Santana Ginanjar,good job,5,pos


## Sentiment Analysis

Split data (70% Training, 30% Test)

In [14]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df_review, test_size=0.3)

Vectorize review text using TFIDF

In [15]:
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)
train_vectors = vectorizer.fit_transform(train['content'])
test_vectors = vectorizer.transform(test['content'])

Classification positive/negative review with Linear SVM

In [16]:
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(train_vectors, train['label'])
t1 = time.time()
prediction_linear = classifier_linear.predict(test_vectors)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1
# results
print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))
report = classification_report(test['label'], prediction_linear, output_dict=True)
print('positive: ', report['pos'])
print('negative: ', report['neg'])

Training time: 408.209574s; Prediction time: 70.502478s
positive:  {'precision': 0.9286439817166074, 'recall': 0.8912990494759931, 'f1-score': 0.9095883596567591, 'support': 16412}
negative:  {'precision': 0.8775145897699965, 'recall': 0.919165767709457, 'f1-score': 0.8978573937478048, 'support': 13905}


Testing the Model

In [34]:
#Test Negative Review
review = 'Bank XYZ selalu gangguan jaringan'
review_vector = vectorizer.transform([review]) # vectorizing
print("Prediction: " + classifier_linear.predict(review_vector))

['Prediction: neg']


In [36]:
#Test Positive Review
review = 'Maju terus bank XYZ'
review_vector = vectorizer.transform([review]) # vectorizing
print("Prediction: " + classifier_linear.predict(review_vector))

['Prediction: pos']


In [41]:
#Test Satire Review
review = 'Bangga menggunakan bank XYZ karena lambat sekali'
review_vector = vectorizer.transform([review]) # vectorizing
print("Prediction: " + classifier_linear.predict(review_vector))

['Prediction: neg']
