ranjeet867 · ranjeet867 · Feb 2, 2020 · Jan 24, 2019 · Feb 6, 2019 · Feb 6, 2019
diff --git a/README.md b/README.md
@@ -4,28 +4,28 @@ Python based Google Play Crawler
 
 ## Example of crawled data:
 
-App name:  Amazon Shopping <br>
-Installs Range:   50,000,000 - 100,000,000
-Rating Value:  4.268221378326416<br>
-Rating Count:  449554<br>
-Reviews Count:  449,554<br>
-Rating:   5 <br>
-Rating count:  281,323<br>
-Rating:   4 <br>
-Rating count:  86,924<br>
-Rating:   3 <br>
-Rating count:  34,203<br>
-Rating:   2 <br>
-Rating count:  14,772<br>
-Rating:   1 <br>
-Rating count:  32,332<br>
-Author Name:  michelle slaughter<br>
-Review Date:  2 June 2016<br>
-Reviewer Link:  /store/apps/details?id=com.amazon.mShop.android.shopping&reviewId=Z3A6QU9xcFRPSEdKRjFBeEJjVFNZMHdfLUptRnprTkhnOGpacWhaSzhUb1NOa29Ca3lYeEtxZi1PeXZZUXVtdVlieExuaG1wbmtSRF83emZLeE1iRXg3dVE<br>
-Reviewer Ratings:  3<br>
-Review Title:  Alright<br>
-Review Body:   Alright The app itself is great. This is my first user on Amazon after being a longtime user of Ebay. Just not too happy with delivery as promised. I ordered 2 instock items directly from Amazon on may 30th, says will ship today (june 2) and have it by the 6th..has yet to be shipped. Funny thing is the next day the 31st I ordered 2 items from a 3rd party on Amazon and they shipped yesterday. Sneaky part tho offering 30 days free of Prime, have to give credit card. I did, mine is expired so they cancelled free trial  Full Review <br> 
-Developer Reply:  <br>
+App name:  Flipkart Online Shopping App<br>
+Installs Range:  100,000,000+<br>
+Rating Value:  4.455704689025879<br>
+Reviews Count:  7689096<br>
+Rating:  5<br>
+Rating count:  5,003,740<br>
+Rating:  4<br>
+Rating count:  1,836,612<br>
+Rating:  3<br>
+Rating count:  465,871<br>
+Rating:  2<br>
+Rating count:  114,706<br>
+Rating:  1<br>
+Rating count:  268,167<br>
+<br>
+review：1<br>
+Author Name: <br>
+Review Date:<br>
+Reviewer Ratings:<br>
+Review Body: <br>
+Developer Reply: <br> 
+
 
 ## Requirements
 

diff --git a/crawl_play_store.py b/crawl_play_store.py
@@ -1,88 +1,99 @@
-import time
-from bs4 import BeautifulSoup
-import sys, io
-from selenium import webdriver
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.common.proxy import *
-
-# @author Ranjeet Singh <ranjeetsingh867@gmail.com>
-# Modify it according to your requirements
-
-no_of_reviews = 1000
-
-non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
-driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver')
-
-wait = WebDriverWait( driver, 10 )
-
-
-# Append your app store urls here
-urls = ["https://play.google.com/store/apps/details?id=com.flipkart.android&hl=en",
-        "https://play.google.com/store/apps/details?id=com.amazon.mShop.android.shopping"]
-
-for url in urls:
-
-    driver.get(url)
-    page = driver.page_source
-
-    soup_expatistan = BeautifulSoup(page, "html.parser")
-
-    expatistan_table = soup_expatistan.find("div", class_="id-app-title")
-
-    print("App name: ", expatistan_table.string)
-
-    expatistan_table = soup_expatistan.find("div", itemprop="numDownloads")
-
-    print("Installs Range: ", expatistan_table.string)
-
-    expatistan_table = soup_expatistan.find("meta", itemprop="ratingValue")
-
-    print("Rating Value: ", expatistan_table['content'])
-
-    expatistan_table = soup_expatistan.find("meta", itemprop="ratingCount")
-
-    print("Rating Count: ", expatistan_table['content'])
-
-    expatistan_table = soup_expatistan.find("span", class_="reviews-num")
-
-    print("Reviews Count: ", expatistan_table.string)
-
-    soup_histogram = soup_expatistan.find("div", class_="rating-histogram")
-
-    rating_bars = soup_histogram.find_all('div', class_="rating-bar-container")
-
-    for rating_bar in rating_bars:
-        print("Rating: ", rating_bar.find("span").text)
-        print("Rating count: ", rating_bar.find("span", class_="bar-number").string)
-
-    next_button = driver.find_element_by_xpath('//*[@id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]')
-
-    for i in range(0,no_of_reviews):
-        try:
-            next_button.click()
-        except Exception:
-         time.sleep(5)
-
-    reviews_div = driver.find_element_by_xpath('//div[@data-load-more-section-id="reviews"]').get_attribute("innerHTML")
-    soup_expatistan = BeautifulSoup(reviews_div, "html.parser")
-
-    expand_pages = soup_expatistan.find_all("div", class_="single-review")
-
-    for expand_page in expand_pages:
-        print("Author Name: ", str(expand_page.find("span", class_="author-name").string.encode("utf-8")))
-        print("Review Date: ", expand_page.find("span", class_="review-date").string.encode("utf-8"))
-        print("Reviewer Link: ", expand_page.find("a", class_="reviews-permalink")['href'])
-        reviewer_ratings = expand_page.find("div", class_="review-info-star-rating").find_next()['aria-label'];
-        reviewer_ratings = ''.join(x for x in reviewer_ratings if x.isdigit())
-        print("Reviewer Ratings: ", reviewer_ratings)
-        print("Review Title: ", str(expand_page.find("span", class_="review-title").string))
-        print("Review Body: ", str(expand_page.find("div", class_="review-body").text.encode("utf-8")))
-        developer_reply = expand_page.find_parent().find("div", class_="developer-reply")
-        if hasattr(developer_reply, "text"):
-            print("Developer Reply: ", str(developer_reply.text.encode("utf-8")))
-        else:
-            print("Developer Reply: ", "")
-
-
-driver.quit()
-
+
+# coding: utf-8
+
+# In[2]:
+
+import time
+from bs4 import BeautifulSoup
+import sys, io
+from selenium import webdriver
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.proxy import *
+
+# @author Ranjeet Singh <ranjeetsingh867@gmail.com>
+# Modify it according to your requirements
+
+no_of_reviews = 1000
+
+non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
+driver = webdriver.Chrome(r"C:\Users\user\Anaconda3\Scripts\chromedriver.exe")
+
+wait = WebDriverWait( driver, 10 )
+
+
+# Append your app store urls here
+urls = ["https://play.google.com/store/apps/details?id=com.flipkart.android&hl=en"]
+
+for url in urls:
+
+    driver.get(url)
+
+    page = driver.page_source
+
+    soup_expatistan = BeautifulSoup(page, "html.parser")
+
+    expatistan_table = soup_expatistan.find("h1", class_="AHFaub")
+
+    print("App name: ", expatistan_table.string)
+
+    expatistan_table = soup_expatistan.findAll("span", class_="htlgb")[4]
+
+    print("Installs Range: ", expatistan_table.string)
+
+    expatistan_table = soup_expatistan.find("meta", itemprop="ratingValue")
+
+    print("Rating Value: ", expatistan_table['content'])
+
+    expatistan_table = soup_expatistan.find("meta", itemprop="reviewCount")
+
+    print("Reviews Count: ", expatistan_table['content'])
+
+    soup_histogram = soup_expatistan.find("div", class_="VEF2C")
+
+    rating_bars = soup_histogram.find_all('div', class_="mMF0fd")
+
+    for rating_bar in rating_bars:
+        print("Rating: ", rating_bar.find("span").text)
+        print("Rating count: ", rating_bar.find("span", class_="L2o20d").get('title'))
+
+    # open all reviews
+    url = url+'&showAllReviews=true'
+    driver.get(url)
+    time.sleep(5) # wait dom ready
+    for i in range(1,10):
+        driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')#scroll to load other reviews
+        time.sleep(1)
+    page = driver.page_source
+
+    soup_expatistan = BeautifulSoup(page, "html.parser")
+    expand_pages = soup_expatistan.findAll("div", class_="d15Mdf")
+    counter = 1
+    for expand_page in expand_pages:
+        try:
+            print("\n===========\n")
+            print("review："+str(counter))
+            print("Author Name: ", str(expand_page.find("span", class_="X43Kjb").text))
+            print("Review Date: ", expand_page.find("span", class_="p2TkOb").text)
+            '''
+            //didn't find reviewer link
+            print("Reviewer Link: ", expand_page.find("a", class_="reviews-permalink")['href'])
+            '''
+            reviewer_ratings = expand_page.find("div", class_="pf5lIe").find_next()['aria-label'];
+            reviewer_ratings = reviewer_ratings.split('(')[0]
+            reviewer_ratings = ''.join(x for x in reviewer_ratings if x.isdigit())
+            print("Reviewer Ratings: ", reviewer_ratings)
+            '''
+            //didn't find review title
+            print("Review Title: ", str(expand_page.find("span", class_="review-title").string))
+            '''
+            print("Review Body: ", str(expand_page.find("div", class_="UD7Dzf").text))
+            developer_reply = expand_page.find_parent().find("div", class_="LVQB0b")
+            if hasattr(developer_reply, "text"):
+                print("Developer Reply: "+"\n", str(developer_reply.text))
+            else:
+                print("Developer Reply: ", "")
+            counter+=1
+        except:
+            pass
+driver.quit()
+