In [5]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from datetime import date
from datetime import datetime


class MmtScraper(object):
    def __init__(self, url):
        self.dict_to_right = {"unique_id": "", "crawl_date": "", "crawl_timestamp": "", "hotel_name": "",
                              "hotel_type_&_speciality": "", "page_url": "", "mmt_assured": "",
                              "safety_protocols_partner": "", "safety_and_hygiene_measures": "",
                              "prepayment_required": "", "country": "", "state/_ut": "", "city": "",
                              "hotel_address": "", "facility": "", "facilities_rating": "", "couple_friendly": "",
                              "check-in_timing": "", "check-out_timing": "", "hotel_overview": "", "amenities": "",
                              "hotel_star_rating": "", "host_name": "", "host_working_experience": "",
                              "hosting_since": "", "interests": "", "property_highlights": "",
                              "hotel_location_&_surroundings": "", "restaurants_at_property": "", "total_rooms": "",
                              "room_categories": "", "room_amenities": "", "activities_nearby": "",
                              "outdoor_sports/activities": "", "general_services": "", "transfers": "",
                              "beauty_&_spa": "", "basic_facilities": "", "health_&_wellness": "", "food_&_drinks": "",
                              "safety_&_hygiene": "", "average_rating": "", "average_review": "",
                              "safety_&_hygiene_avg_rating": "", "child_friendliness_avg_rating": "",
                              "location_avg_rating": "", "total_photos": "", "photos_urls": "", "review_keywords": ""}
        self.url = url
        self.options = webdriver.ChromeOptions()
        self.options.add_argument('--ignore-certificate-errors')
        self.options.add_argument('--incognito')
        self.driver = webdriver.Chrome(executable_path="",
                                       options=self.options)
        self.delay = 3

    def load_mmt_url(self):
        self.driver.get(self.url)
        try:
            wait = WebDriverWait(self.driver, self.delay)
            wait.until(EC.presence_of_element_located((By.CLASS_NAME, "deal-view-details")))
            print("Page is ready")
        except TimeoutException:
            print("Loading took too much time! seems like no hotels")

    def extract_hotel_links(self):
        all_link_elements = self.driver.find_elements_by_class_name("listingRowOuter a")
        for i in range(len(all_link_elements)):
            all_link_elements[i] = all_link_elements[i].get_attribute('href')

        return all_link_elements

    def open_single_hotel_link(self, link):
        self.driver.get(link)
        try:
            wait = WebDriverWait(self.driver, self.delay)
            self.driver.implicitly_wait(10)
            wait.until(EC.presence_of_element_located((By.ID, "detpg_hotel_name")))
            self.dict_to_right["unique_id"] = str(link.split('hotelId=')[1].split('&')[0])
            today = date.today()
            now = datetime.now()
            dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
            self.dict_to_right["crawl_date"] = today.strftime("%m/%d/%y")
            self.dict_to_right["crawl_timestamp"] = dt_string
            self.dict_to_right["hotel_name"] = self.driver.find_element_by_id("detpg_hotel_name").text
            self.dict_to_right["hotel_type_&_speciality"] = "Premium | Luxurious Business Stays"
            self.dict_to_right["page_url"] = link
            self.dict_to_right["mmt_assured"] = 'Yes' if self.driver.find_elements_by_class_name('MMT_ASSURED') else ''
            try:
                self.dict_to_right["safety_protocols_partner"] = self.driver.find_elements_by_class_name('blackText font b')[0].text + self.driver.find_elements_by_class_name('blackText font b')[1].text
            except:
                self.dict_to_right["safety_protocols_partner"] = ''
            self.dict_to_right["safety_and_hygiene_measures"] = [data.text +' | ' for data in self.driver.find_elements_by_class_name("covidMeasure__middle ul li span font")]
            self.dict_to_right["prepayment_required"] = 'Yes'
            self.dict_to_right["country"] = 'India' #
            self.dict_to_right["state/_ut"] = self.driver.find_element_by_id("detpg_hotel_location").text.split(',')[-1]
            try:
                self.dict_to_right["city"] = self.driver.find_element_by_id("detpg_hotel_location").text.split(',')[-2]
            except:
                self.dict_to_right["city"] = self.driver.find_element_by_id("detpg_hotel_location").text.split(' ')[-2]
            self.dict_to_right["hotel_address"] = self.driver.find_element_by_id("detpg_hotel_location").text
            self.dict_to_right["facility"] = [data.text +' | ' for data in self.driver.find_elements_by_class_name("facilityList li div p")]
            self.dict_to_right["facilities_rating"] = self.driver.find_elements_by_class_name("rating")[0].text
            self.dict_to_right["couple_friendly"] = self.driver.find_elements_by_class_name("rating")[0].text
            self.dict_to_right["check-in_timing"] = self.driver.find_elements_by_class_name("appendBottom12 span")[0].text
            self.dict_to_right["check-out_timing"] = self.driver.find_elements_by_class_name("appendBottom12 span")[2].text
            try:
                self.dict_to_right["hotel_overview"] = self.driver.find_elements_by_class_name("appendBottom10 b")[0].text
            except:
                self.dict_to_right["hotel_overview"] = ''
            try:
                self.dict_to_right["amenities"] = self.driver.find_elements_by_class_name("pptDtls__list li")[0].text
            except:
                self.dict_to_right["amenities"] = ''
            self.dict_to_right["hotel_star_rating"] = ''
            self.dict_to_right["host_name"] = ''
            self.dict_to_right["host_working_experience"] = ''
            self.dict_to_right["hosting_since"] = ''
            self.dict_to_right["interests"] = ''
            self.dict_to_right["property_highlights"] = [data.text +' | ' for data in self.driver.find_elements_by_class_name("facilityList li div p")]
            self.dict_to_right["hotel_location_&_surroundings"] = ""
            self.dict_to_right["restaurants_at_property"] = ""
            self.dict_to_right["total_rooms"] = ""
            self.dict_to_right["room_categories"] = ""
            self.dict_to_right["room_amenities"] = ""
            self.dict_to_right["activities_nearby"] = ""
            self.dict_to_right["outdoor_sports/activities"] = ""
            self.dict_to_right["general_services"] = ""
            self.dict_to_right["transfers"] = ""
            self.dict_to_right["beauty_&_spa"] = ""
            self.dict_to_right["basic_facilities"] = ""
            self.dict_to_right["health_&_wellness"] = ""
            self.dict_to_right["food_&_drinks"] = ""
            self.dict_to_right["safety_&_hygiene"] = ""
            self.dict_to_right["average_rating"] = ""
            self.dict_to_right["average_review"] = ""
            self.dict_to_right["safety_&_hygiene_avg_rating"] = ""
            self.dict_to_right["child_friendliness_avg_rating"] = ""
            self.dict_to_right["location_avg_rating"] = ""
            self.dict_to_right["total_photos"] = ""
            self.dict_to_right["photos_urls"] = ""
            self.dict_to_right["review_keywords"] = ""

        except TimeoutException:
            print("Loading took too much time! seems like no hotels")

        return self.dict_to_right.values()

    def quit(self):
        self.driver.quit()
