# Getting in through the login page

In [18]:
# Download needed packages
from selenium import webdriver
from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By 
from selenium.webdriver.chrome.service import Service
import time
import pandas as pd
import config

In [19]:
# Retriving username and password from alternate file
bardough_username = config.username
bardough_password = config.password

In [21]:
# Open the web page that we want open and log in.
path = '/Users/sigfus/Desktop/Lífið/Github/Bardough/Crawling/chromedriver-mac-x64/chromedriver'
s = Service(path)
driver = webdriver.Chrome(service = s)
driver.get('https://www.toasttab.com/login')

In [22]:
# Locate the username input field
username = driver.find_element(By.ID, 'username') 

In [23]:
# Sign in with email first
userid = bardough_username
username.send_keys(userid)

In [24]:
# Click the sign in button to prompt password
sign_in_button = driver.find_element('xpath', '/html/body/div[2]/main/section/div/div/div/div/div/form/div[2]/button')
sign_in_button.click()

In [25]:
# Locate the password input field
password = driver.find_element(By.ID, 'password')

In [26]:
# Sign in with password second
key = bardough_password
password.send_keys(key)

In [27]:
# Click the sign in button to enter page
sign_in_button = driver.find_element('xpath', '/html/body/div[2]/main/section/div/div/div/form/div[3]/button')
sign_in_button.click()

In [28]:
# Get url of Time Entries page manually(under reports --> Employee Performance)
url = 'https://www.toasttab.com/restaurants/admin/reports/home#selection-details'
driver.get(url)

# Crawling Time Entries data for December 2022 - November 2023

## Code for crawling 2023 time entries data

### Month changed for each iteration and total combined at end

In [89]:
# Crawl time entry month by month

nov_employee = []
nov_job_title = []
nov_in_date = []
nov_out_date = []
nov_total_hours = []
nov_unpaid_break = []
nov_paid_break = []
nov_payable_hours = []

page = 0
# Change page count based on number of pages
while page < 3:
    
    # Scan page
    soup = bs(driver.page_source)
    
    # Crawl page
    ## Odd rows
    odd_rows = soup.find_all(class_ = 'odd')
    odd_list = []
    for row in odd_rows:
        # Find all cells in the row and loop through them
        odd_cells = row.find_all('td')
        for cell in odd_cells:
            # Extract text from each cell and convert to int if possible
            odd_text = cell.get_text()
            odd_list.append(odd_text)
    ## Even rows
    even_rows = soup.find_all(class_ = 'even')
    even_list = []
    for row in even_rows:
        # Find all cells in the row and loop through them
        even_cells = row.find_all('td')
        for cell in even_cells:
            # Extract text from each cell and convert to int if possible
            even_text = cell.get_text()
            even_list.append(even_text)
    # Combine lists        
    total_list = odd_list + even_list
    
    # Sort and append lists
    employee = total_list[::8]
    nov_employee.append(employee)
    
    job_title = total_list[1::8]
    nov_job_title.append(job_title)
    
    in_date = total_list[2::8]
    nov_in_date.append(in_date)
    
    out_date = total_list[3::8]
    nov_out_date.append(out_date)
    
    total_hours = total_list[4::8]
    nov_total_hours.append(total_hours)
    
    unpaid_break = total_list[5::8]
    nov_unpaid_break.append(unpaid_break)
    
    paid_break = total_list[6::8]
    nov_paid_break.append(paid_break)
    
    payable_hours = total_list[7::8]
    nov_payable_hours.append(payable_hours)
    
    # Click to next page
    time.sleep(4)
    #Scroll down to the bottom of the page 
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    #wait for page to be loaded
    time.sleep(4)
    #find and click 'next' button
    #next_page_button = driver.find_element('xpath', '//*[@id="labor-time-entries-table_wrapper"]/div[3]/div/div/ul/li[4]/a')
    next_page_button = driver.find_element('xpath', '//*[@id="labor-time-entries-table_wrapper"]/div[3]/div/div/ul/li[5]/a')
    next_page_button.click()
    time.sleep(4)
    
    page += 1

In [92]:
# Check for duplicate pages (error in crawling/page switching)
# For loop to select page's list
for i in range(len(nov_employee)):
    # Nested for loop to go through other pages' lists
    for j in range(i+1, len(nov_employee)):
        # Evaluation to see if page i's data is equal to other lists' data
        if nov_employee[i] == nov_employee[j]:
            # Show where the duplicates are found
            print('The lists at index {} and {} are equal'.format(i, j))

In [90]:
# Combine page nested lists into one huge list
comp_nov_employee = [inner_item for outer_item in nov_employee for inner_item in outer_item]
comp_nov_job_title = [inner_item for outer_item in nov_job_title for inner_item in outer_item]
comp_nov_in_date = [inner_item for outer_item in nov_in_date for inner_item in outer_item]
comp_nov_out_date = [inner_item for outer_item in nov_out_date for inner_item in outer_item]
comp_nov_total_hours = [inner_item for outer_item in nov_total_hours for inner_item in outer_item]
comp_nov_unpaid_break = [inner_item for outer_item in nov_unpaid_break for inner_item in outer_item]
comp_nov_paid_break = [inner_item for outer_item in nov_paid_break for inner_item in outer_item]
comp_nov_payable_hours = [inner_item for outer_item in nov_payable_hours for inner_item in outer_item]

In [91]:
# Create data frame for month
nov_time_entries = {'Employee': comp_nov_employee, 
                'Job Title': comp_nov_job_title, 
                'In Date': comp_nov_in_date, 
                'Out Date': comp_nov_out_date, 
                'Total Hours': comp_nov_total_hours, 
                'Unpaid Break Time': comp_nov_unpaid_break, 
                'Paid Break Time': comp_nov_paid_break, 
                'Payable Hours': comp_nov_payable_hours
                }

nov_time_entries = pd.DataFrame(nov_time_entries)
nov_time_entries.sort_values(by = "In Date")
nov_time_entries.shape

(222, 8)

In [93]:
# Download to CSV
nov_time_entries.to_csv('/Users/sigfus/Desktop/nov_time_entries.csv', sep = ',', encoding = 'utf-8')

# Combining monthly data frames into one

In [94]:
# Create a list of the 12 monthly data frames
time_entries_seperate = [dec_22_time_entries, jan_time_entries, feb_time_entries, 
                         mar_time_entries, apr_time_entries, may_time_entries, 
                         jun_time_entries, jul_time_entries, aug_time_entries, 
                         sep_time_entries, oct_time_entries, nov_time_entries]

In [96]:
# Combine them into one data frame with a new index rather than index from monthly data frames
time_entries_full = pd.concat(time_entries_seperate, ignore_index = True)
time_entries_full.shape

(2293, 8)

In [97]:
# Download to CSV
time_entries_full.to_csv('/Users/sigfus/Desktop/time_entries_full.csv', sep = ',', encoding = 'utf-8')