In [5]:
# import dependencies
import os
import pandas as pd
#import requests
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist 
from selenium import webdriver
from selenium.common.exceptions import ElementClickInterceptedException, TimeoutException, ElementNotInteractableException, JavascriptException
from datetime import date, datetime, timedelta
import holidays
import time

import json

from functions import country_holidays, intHolidayClosures, allMondays, buildBrowser, createFilename

# makes it easier to see dataframes
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("<style>.output_result { max-width:100% !important; }</style>"))
pd.set_option('expand_frame_repr', False)



### Creates a list of trips starting on Mondays and lasting 17 days. International holidays closures and country holidays are taken into account.

In [6]:
int_stop_date = date(2022, 12, 31)
dom_stop_date = date(2022, 12, 31)


# function to generate list of trips from (Specific to Camper Travel USA)
def create_trips(int_stop_date, US):
    for pu_d in allMondays(int_stop_date):
        # drop off date is 17 days after today, average internation reservation is 17 days
        do_d = pu_d + timedelta(17)
        # for all mondays after today (weeks = 36, is the following year)
        if pu_d > date.today() + timedelta(10):
            # if monday is a holiday, change pickup to Tuesday
            if pu_d in intHolidayClosures(country_holidays, US):
                pu_d += timedelta(1)
            # if Thursday is a holiday, change drop off to Wednesday
            if do_d in intHolidayClosures(country_holidays, US):
                do_d -= timedelta(1)
            #We should use yield when we want to iterate over a sequence, but don’t want to store the entire sequence in memory. 
            yield {'start_date':pu_d.strftime('%Y-%m-%d'),
                    'start_day': pu_d.strftime('%d').lstrip('0'),
                    'start_month': pu_d.strftime('%b'),
                    'start_year': pu_d.strftime('%Y'),
                    'start_date_out':pu_d.strftime('%m-%d-%Y'),
                    'end_date':do_d.strftime('%Y-%m-%d'),
                    'end_day':do_d.strftime('%d').lstrip('0'),
                    'end_month':do_d.strftime('%b'),
                    'end_year':do_d.strftime('%Y')}

# create list of dictionaries to store cities that will be searched
cities = [{'city':'San Francisco','state':'CA', 'abbr':'SFO', 'country':'USA'},
          {'city':'Las Vegas','state':'NV', 'abbr':'LAS', 'country':'USA'},
          {'city':'Los Angeles','state':'CA', 'abbr':'LAX', 'country':'USA'},
          {'city':'Seattle','state':'WA', 'abbr':'SEA', 'country':'USA'},
          {'city':'Salt Lake City','state':'UT', 'abbr':'SLC', 'country':'USA'},
          {'city':'Denver','state':'CO', 'abbr':'DEN', 'country':'USA'},
          {'city':'Chicago','state':'IL', 'abbr':'CHI', 'country':'USA'},
          {'city':'Dallas','state':'TX', 'abbr':'DFW', 'country':'USA'},
          {'city':'Orlando','state':'FL', 'abbr':'MCO', 'country':'USA'}]

# add a 'done' key to each city and set it to False
for city in cities: city['done'] = False

In [11]:

#locates the chromedriver to be used in webdriver.chrome() selenium webapp creator function (so you can see the bot)
executable_path = 'C:\Program Files\Chromedriver\chromedriver.exe'
#site to be webscraped
url = 'https://www.campertravelusa.com/'


#must find the createFilename function to determine what this does, likely it creates a saved file in the working directory
log_file = createFilename('Camper Travel', True)

In [8]:

#javascript logic, not sure how this works
click_chat_js = """
                    var button = document.querySelector('button.olark-launch-button.olark-size-md');
                    function eventFire(elem, etype) {
                      if (elem.fireEvent) {
                        elem.fireEvent('on' + etype);
                      } 
                      else {
                        var evObj = document.createEvent('Events');
                        evObj.initEvent(etype, true, false);
                        elem.dispatchEvent(evObj);
                      }
                    }
                    eventFire(button, 'click');
                """

In [9]:
#if the file created by createFilename and stored in log_file variable exists,
#open the file, iterate though it and store it as json in the data variable

if os.path.exists(log_file):

    with open(log_file, 'r') as fin:

        #json.loads is a decoder
        data = [json.loads(x) for x in fin]

    #drils down into the json to find the last location and last pickup date, to define where the loop will stop
    last_city = data[-1]['Location'].split(',')[0]
    last_date = date[-1]['Pickup Date']
#iterate through the cities dictionary, stop the loop if equal to last city and else change city['done'] from False to True
    for city in cities:
        if city['city'] == last_city: break
        else: city['done'] = True
    # enumerate through the list of created trips, if pickup date out is equal to the last date iterate to the next index of the created trips dictionary and store it as the lt variable
    # THE ISSUE HERE IS pu_date_out is not key in the create trips dictionary, 
    # TWO OPTION HERES, EITHER IT IS MISNAMED AND IT SHOULD BE start_date_out, or ONE OF THE UNKNOWN FUNCTIONS IS ADDING A pu_date_out key
    lt = next((t for t, trip in enumerate(create_trips(dom_stop_date, True)) if trip['pu_date_out'] == last_date))

else:
    lt = -1

In [13]:
deals_df = pd.DataFrame()
# search dates for each city
for c, city in enumerate(cities):
    #this executes when city['done'] is equal to False
    if not city['done']:
        
        US = True if city['country'] == 'USA' else False
        browser = buildBrowser(executable_path)
        browser.get(url)

        # select city for pickup and dropoff inputs
        browser.find_by_xpath("//input[contains(@id, 'pickupLocation')]").click()
        browser.find_by_xpath(f"//div[@class = 'autocomplete-suggestion'][@data-val = '{city['city']}']").click()

        # choose international driver's license
        browser.find_by_css('input.form-control.X-CountryOfResidence.AutoCompleteSelectInput').click()
        browser.find_by_xpath("//div[@class = 'autocomplete-suggestion'][@data-val = 'International']").click()

        

        for t, trip in enumerate(create_trips(int_stop_date, US)):

            if t > lt:
                deals = []
            
                browser.find_by_xpath("//button[contains(@id, 'pickupDate')]").click()

                cal_month = browser.find_by_xpath("//div[@class = 'dr-cal-start']/div/div/div").find_by_css('button.dp-cal-month').text[0:3]

                while cal_month != trip['start_month']:
                    browser.find_by_xpath("//div[@class = 'dr-cal-end']/div/div/div/header/button[@class = 'dp-next']").click()
                    cal_month = browser.find_by_xpath("//div[@class = 'dr-cal-start']/div/div/div").find_by_css('button.dp-cal-month').text[0:3]

                pu_d = browser.find_by_xpath(f"//div[@class = 'dr-cal-start']/div/div/div/div/button[not(contains(@class, 'edge-day'))][text() = {trip['start_day']}]")
                pu_d.click()

                if int(trip['start_day']) < int(trip['end_day']):
                    do_d = browser.find_by_xpath(f"//div[@class = 'dr-cal-start']/div/div/div/div/button[not(contains(@class, 'edge-day'))][text() = {trip['end_day']}]")
                else:
                    do_d = browser.find_by_xpath(f"//div[@class = 'dr-cal-end']/div/div/div/div/button[not(contains(@class, 'edge-day'))][text() = {trip['end_day']}]")
                do_d.click()

                browser.find_by_css('button.btn.btn-success.btn-lg.btn-block.X-SearchButton').click()
                browser.windows.current = browser.windows[1]

                while browser.is_element_not_present_by_css('div.campervan-result'):
                    continue

                #browser.execute_script("window.scrollTo(500, $(document).height());")
                browser.execute_script("window.scrollTo(500, 0);")
                while True:
                    try:
                        browser.execute_script("document.querySelector('#hbl-live-chat-wrapper').style.display = 'none';")
                        break
                    except JavascriptException:
                        continue

                results = browser.find_by_css('div.campervan-result')
                
                for result in results:

                    browser.execute_script("document.querySelector('#hbl-live-chat-wrapper').style.display = 'none';")
                    
                    if len(result.find_by_css('span.X-VPrice-ConvertedFrom-Amount')) > 0:
                    
                        daily_rate = float(result.find_by_css('span.X-VPrice-ConvertedFrom-Amount').text)
                        pu_date = trip['start_date']
                        do_date = trip['end_date']
                        location = f'{city["city"]}, {city["state"]}'

                        if browser.find_by_css('div#olark-container').visible:
                            browser.execute_script(click_chat_js)

                        while True:
                            try:
                                result.find_by_text('Check Availability').click()
                                break
                            except ElementClickInterceptedException:
                                try:
                                    browser.execute_script("window.scrollTo(0, window.scrollY + 200);")
                                except JavascriptException:
                                    continue

                        browser.windows.current = browser.windows[2]
                        start = time.time()
                        while True:
                            try:
                                if time.time() > start + 30:
                                    browser.reload()
                                    # browser.windows.current.close()
                                    # browser.windows.current = browser.windows[1]
                                    # result.find_by_css('a.button-green.button-book').click()
                                    # browser.windows.current = browser.windows[2]
                                    start = time.time()
                            except TimeoutException:
                                continue
                            try:
                                company = browser.find_by_css('img.vehicle-image')['src'].split('/')[-2].replace('-', ' ').lower().rstrip('us').strip().title()
                                print(company)
                                break
                            except ElementDoesNotExist:
                                continue
                        rv_class = browser.find_by_css('div.vehicle-name').text
                        try:
                            section = browser.find_by_css('span.X-VPrice-ConvertedFrom.v-price-total.v-price-converted-from').last
                            total = float(section.find_by_css('span.X-VPrice-ConvertedFrom-Amount').text)
                            print(total)
                        except ElementDoesNotExist:
                            total = 'Not Listed'

                        browser.windows.current.close()
                        browser.windows.current = browser.windows[1]

                        deals.append({'Pickup Date':pu_date,'Dropoff Date':do_date,'Class':rv_class,'Company':company,
                                    'Daily Rate':daily_rate,'Price':total,'Location':location})

                        deals_df = deals_df.append(deals, ignore_index=True)
                
                browser.windows.current.close()

        browser.quit()
        

AttributeError: 'WebDriver' object has no attribute 'find_by_xpath'

In [115]:
print(deals_df)

     Pickup Date Dropoff Date                             Class             Company  Daily Rate    Price           Location
0     2022-05-16   2022-06-02                     Station Wagon  Travellersautobarn       99.31  1787.50  San Francisco, CA
1     2022-05-16   2022-06-02                     Station Wagon  Travellersautobarn       99.31  1787.50  San Francisco, CA
2     2022-05-16   2022-06-02               Class C 23-25ft (M)            Roadbear      162.49  4320.38  San Francisco, CA
3     2022-05-16   2022-06-02                     Station Wagon  Travellersautobarn       99.31  1787.50  San Francisco, CA
4     2022-05-16   2022-06-02               Class C 23-25ft (M)            Roadbear      162.49  4320.38  San Francisco, CA
...          ...          ...                               ...                 ...         ...      ...                ...
6609  2022-12-27   2023-01-12               C30 Large Motorhome      Cruise America       79.35  2707.02        Orlando, FL
6610  20

In [116]:
deals_df.to_csv('camper_travel_data_us.csv', index=False)