From 5e454087f052393d3e20ec4ff13aeda3e9eaa1bb Mon Sep 17 00:00:00 2001 From: Ankit Kaurav <53783410+kauravankit25@users.noreply.github.com> Date: Tue, 18 Aug 2020 23:57:25 +0530 Subject: [PATCH] Add files via upload --- .../flipkart_product_reviews/flipkart.py | 84 +++++++++++++++++++ .../flipkart_product_reviews/readme.md.txt | 1 + 2 files changed, 85 insertions(+) create mode 100644 Web-Scraping/flipkart_product_reviews/flipkart.py create mode 100644 Web-Scraping/flipkart_product_reviews/readme.md.txt diff --git a/Web-Scraping/flipkart_product_reviews/flipkart.py b/Web-Scraping/flipkart_product_reviews/flipkart.py new file mode 100644 index 00000000..94387f99 --- /dev/null +++ b/Web-Scraping/flipkart_product_reviews/flipkart.py @@ -0,0 +1,84 @@ +from bs4 import BeautifulSoup + +import requests + +URL = input("ENTER PRODUCT URL: ") + + +req_data = requests.get(URL) +review_soup = BeautifulSoup(req_data.content, "html.parser") + +base_url = "https://www.flipkart.com" +data = review_soup.find('div', {"class": "swINJg _3nrCtb"}).find_parent().get("href") + +review_url = base_url + data +all_review_data = requests.get(review_url) +all_review_soup = BeautifulSoup(all_review_data.content, "html.parser") +review_url + +all_reviews = review_soup.find_all('div', {'class': 'col _39LH-M'}) +len(all_reviews) + +header_list = [] +detailed_review_list = [] +user_list = [] +rating_list = [] +like_dislikes_list = [] +count = 0 +base_url = "https://www.flipkart.com" + +while(count<11): + + print(review_url) + all_review_data = requests.get(review_url) + all_review_soup = BeautifulSoup(all_review_data.content, "html.parser") + all_reviews = all_review_soup.find_all('div', {'class': 'ooJZfD _2oZ8XT col-9-12'}) + #print(all_reviews) + for review in all_reviews: + header = review.find_all("p", {"class": "_2xg6Ul"}) + user = review.find_all("p", {"class": "_3LYOAd _3sxSiS"}) + detailed_review = review.find_all("div", {"class": "qwjRop"}) + rating = review.find_all("div", {"class": "hGSR34 E_uFuv"}) + likes_dislikes = review.find_all("span", {"class": "_1_BQL8"}) + + header = [e.text for e in header] + user = [e.text for e in user] + detailed_review = [e.text for e in detailed_review] + rating = [e.text for e in rating] + likes_dislikes = [e.text for e in likes_dislikes] + + header_list.append(header) + user_list.append(user) + detailed_review_list.append(detailed_review) + rating_list.append(rating) + like_dislikes_list.append(likes_dislikes) + + count+=1 + review_url = base_url + all_review_soup.find_all("a", {"class": "_3fVaIS"})[-1].get("href") + + +user_list + +base_url = "https://www.flipkart.com" +data = review_soup.find('div', {"class": "swINJg _3nrCtb"}).find_parent().get("href") + +review_url = base_url + data +review_url + + + +user_list = [item for sublist in user_list for item in sublist] +header_list = [item for sublist in header_list for item in sublist] +detailed_review_list = [item for sublist in detailed_review_list for item in sublist] +rating_list = [item for sublist in rating_list for item in sublist] +like_dislikes_list = [item for sublist in like_dislikes_list for item in sublist] + +i=0 +while(i<20): + print(str(i+1) + ". " + user_list[i] + ":") + print(rating_list[i] + " stars") + print(header_list[i]) + print(detailed_review_list[i]) + print(like_dislikes_list[i]) + print("\n\n") + i+=1 diff --git a/Web-Scraping/flipkart_product_reviews/readme.md.txt b/Web-Scraping/flipkart_product_reviews/readme.md.txt new file mode 100644 index 00000000..93083c22 --- /dev/null +++ b/Web-Scraping/flipkart_product_reviews/readme.md.txt @@ -0,0 +1 @@ +Asks the user for the URL of the product they are interested in and then scrapes 10 pages of reviews for them. \ No newline at end of file