# Scraper Data ELSA SPEAK

Proyek ini bertujuan untuk menganalisis sentimen pengguna ELSA SPEAK berdasarkan ulasan dan feedback yang diberikan oleh pengguna platform. Dengan menggunakan teknik analisis sentimen berbasis pembelajaran mesin, proyek ini akan mengidentifikasi perasaan positif, negatif, atau netral yang terkandung dalam ulasan pengguna mengenai pengalaman pengguna dalam menggunakan aplikasi untuk melatih kemampuan berbicara dalam bahasa Inggris.


In [1]:
!pip install google-play-scraper



## 1. Import Library

In [2]:
from google_play_scraper import reviews_all, reviews
from google_play_scraper import Sort
import pandas as pd
import numpy as np
from google_play_scraper import app


## 2. Scrape Data

In [3]:
# scrape semua review yang tersedia

app_id = 'us.nobarriers.elsa'

def get_reviews(app_id, lang='id', count = 10000, sort=Sort.NEWEST, filter_score_with=None, filter_device_with=None, continuation_token=None):
  try:
    result, continuation_token = reviews(
        app_id,
        lang=lang,
        country='id',
        count=count,
        sort=sort,
        filter_score_with=filter_score_with,
        filter_device_with=filter_device_with,
        continuation_token=continuation_token
    )
    return result, continuation_token
  except Exception as e:
    print("Error:", e)
    return None, None

reviews, continuation_token = get_reviews(app_id)

if reviews is not None:
    print("Total reviews:", len(reviews))
    if len(reviews) > 0:
        print("First review:", reviews[0])
else:
    print("No reviews found.")

Total reviews: 6101
First review: {'reviewId': '33226353-5e51-4cdb-a61a-43c0bebf3170', 'userName': 'Pengguna Google', 'userImage': 'https://play-lh.googleusercontent.com/EGemoI2NTXmTsBVtJqk8jxF9rh8ApRWfsIMQSt2uE4OcpQqbFu7f7NbTK05lx80nuSijCz7sc3a277R67g', 'content': 'sekarang berbayar semua', 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': None, 'at': datetime.datetime(2025, 4, 24, 2, 52, 31), 'replyContent': 'Thank you for your feedback! Our pricing reflects the value and quality of our content and features. We strive to provide the best learning experience, and we frequently offer discounts for our learners. For more information on available discounts, please contact us at support@elsanow.io.', 'repliedAt': datetime.datetime(2025, 4, 24, 7, 0, 32), 'appVersion': None}


In [5]:
# Access the keys of the first review in the list
reviews[0].keys()

# If you want a list of all keys across all reviews, you can use:
all_keys = []
for review in reviews:
    all_keys.extend(review.keys())
unique_keys = list(set(all_keys)) # To get unique keys
print(unique_keys)

['userImage', 'reviewCreatedVersion', 'userName', 'thumbsUpCount', 'appVersion', 'replyContent', 'at', 'score', 'content', 'repliedAt', 'reviewId']


In [7]:
reviews = pd.DataFrame(reviews)
reviews.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
0,33226353-5e51-4cdb-a61a-43c0bebf3170,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,sekarang berbayar semua,1,0,,2025-04-24 02:52:31,Thank you for your feedback! Our pricing refle...,2025-04-24 07:00:32,
1,bcb28cd3-cb68-4a05-9879-19979875560c,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"sejauh ini apknya baguss bagus ajaa, semoga ke...",3,0,7.7.7,2025-04-23 03:31:12,,NaT,7.7.7
2,a29c65d8-62b2-4943-bd5f-bdd8fddf88ae,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,good,5,0,7.7.5,2025-04-22 20:31:38,,NaT,7.7.5
3,472f0e79-01e8-4fe8-8da1-7cddc917004d,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,aplikasi yang sangat luar biasa bisa membantu ...,5,3,7.7.7,2025-04-21 14:49:28,,NaT,7.7.7
4,e39bbf3a-eef0-4d7f-9699-02ff1c33a83f,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,very good aplication,5,0,7.6.3,2025-04-21 06:11:32,,NaT,7.6.3


## 3. Export CSV File

In [14]:
import csv

def export_to_csv(reviews, file_name='reviews.csv'):
    with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
       # pilih kolom yang ingin di analisis sentimen
        fieldnames = ['userName', 'at', 'score', 'content', 'reviewId']

        with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for review in reviews.to_dict('records'): # Convert DataFrame to list of dictionaries
                writer.writerow({
                    'userName': review['userName'],
                    'at': review['at'],
                    'score': review['score'],
                    'content': review['content'],
                    'reviewId': review['reviewId']
                })
                print(f"Reviews berhasil diekspor ke {file_name}") # Indent to be inside the loop
            else:
                print("Tidak ada data untuk diekspor.") # Indent to be part of the loop's else block

export_to_csv(reviews)

Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil diekspor ke reviews.csv
Reviews berhasil