In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time  # for adding explicit waits
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import pandas as pd
import traceback
# pip install lxml
from datetime import datetime

# Initialize the WebDriver and set the window size
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.set_window_size(1903, 1015)  # Set the window size as per your IDE script

# Step 1: Open the login page
driver.get("https://app.courtreserve.com/Online/Account/LogIn/10688")

# Step 2: Check if the login form is present (i.e., you're not already logged in)
try:
    # Attempt to find the username field
    username_field = driver.find_element(By.ID, "UserNameOrEmail")
    
    # If found, log in
    username_field.send_keys("mohit.dhawan@tetracap.com")
    driver.find_element(By.ID, "Password").send_keys("123abc321")
    driver.find_element(By.CSS_SELECTOR, ".btn-log").click()
    
    print("Login form found, logging in...")
    
    # Wait for login to complete
    time.sleep(10)

except Exception as e:
    # If login form is not found, you may already be logged in
    print("Login form not found, assuming already logged in...")

# Step 3: Navigate directly to the reservations page
driver.get("https://app.courtreserve.com/Online/Reservations/Index/10688")


time.sleep(5)
page_source = driver.page_source
# print(page_source)

with open('page_source.html', 'w', encoding='utf-8') as f:
    f.write(page_source)
    

soup = BeautifulSoup(page_source,'lxml')
details_links = soup.find_all('a', class_='btn-scheduler-details')
base_url = "https://app.courtreserve.com"

total_links = len(details_links)
print(f"Total number of details links: {total_links}")

Login form found, logging in...
Total number of details links: 22


In [2]:
data_list=[]
visited_urls = set()
# Loop through each event (assuming the loop for different events is outside)
for i, link in enumerate(details_links):
    try:
        # Step 1: Get href and construct full URL
        href = link.get('href')
        full_url = base_url + href
        print(f"Processing: {href}")
        # Check if the full_url has already been visited
        if full_url in visited_urls:
            print(f"Skipping already visited URL: {full_url}")
            continue  # Skip to the next link
        
        # Mark this URL as visited
        visited_urls.add(full_url)
        # Navigate to the full URL
        driver.get(full_url)
        
        # Wait for the page to load fully
        time.sleep(5)
        
        # Get the page source and parse it using BeautifulSoup
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'lxml')
        
        # 1. Extract the title (h4 tag)
        title = soup.find('h4').get_text(strip=True)
        
        # 2. Extract all elements with class 'title-part'
        title_parts = soup.find_all('span', class_='title-part')
        
        # Extract date, time, fees, and participant count
        date = title_parts[0].get_text(strip=True) if len(title_parts) > 0 else 'N/A'
        time_info = title_parts[1].get_text(strip=True) if len(title_parts) > 1 else 'N/A'
        fees = title_parts[2].get_text(strip=True) if len(title_parts) > 2 else 'N/A'
        participant_count = title_parts[3].get_text(strip=True) if len(title_parts) > 3 else 'N/A'
        
        # Concatenate any additional title-part elements beyond the first four
        additional_info = " ".join([part.get_text(strip=True) for part in title_parts[4:]]) if len(title_parts) > 4 else 'N/A'
        
        # Extract coach name
        instructor_label = soup.find('p', string='Instructor(s)')
        coach = instructor_label.find_next_sibling('p').get_text(strip=True) if instructor_label else 'N/A'

        # Extract court information
        court_label = soup.find('p', string='Court(s)')
        court = court_label.find_next_sibling('p').get_text(strip=True) if court_label else 'N/A'
        
        # Extract registrants information
        registrants_element = soup.find('span', class_='k-link', string=lambda text: 'REGISTRANTS' in text)
        registrants_text = registrants_element.get_text(strip=True) if registrants_element else 'N/A'
        
        # Extract the number of registrants from the text
        import re
        registrants_count = re.search(r'\((\d+)\)', registrants_text)
        registrants_count = registrants_count.group(1) if registrants_count else 'N/A'
        
        # Append the data to the list
        data_list.append({
            'Title': title,
            'Date': date,
            'Time': time_info,
            'Fees': fees,
            'Info': participant_count,
            'AdditionalInfo': additional_info,
            'Coach': coach,
            'Court': court,
            'Registrants': registrants_count
        })
        
        # Go back to the reservations page to process the next link
        driver.back()
        time.sleep(4)
        
        # Print progress
        print(f"Completed {i + 1}/{total_links}")
        
    except Exception as e:
        # Print out the error and continue with the next iteration
        print(f"Error occurred: {e}, {full_url}")
        traceback.print_exc()
        continue

Processing: /Online/Events/Details/10688/VPQWM9Q10688283
Completed 1/22
Processing: /Online/Events/Details/10688/8TMTFQ210688290
Completed 2/22
Processing: /Online/Events/Details/10688/A9RZLET10688852
Completed 3/22
Processing: /Online/Events/Details/10688/95QBDCK10688130
Completed 4/22
Processing: /Online/Events/Details/10688/L6XGIW210688322
Completed 5/22
Processing: /Online/Events/Details/10688/D8QDNKT10688547
Completed 6/22
Processing: /Online/Events/Details/10688/8TMTFQ210688290
Skipping already visited URL: https://app.courtreserve.com/Online/Events/Details/10688/8TMTFQ210688290
Processing: /Online/Events/Details/10688/L6XGIW210688322
Skipping already visited URL: https://app.courtreserve.com/Online/Events/Details/10688/L6XGIW210688322
Processing: /Online/Events/Details/10688/8TMTFQ210688290
Skipping already visited URL: https://app.courtreserve.com/Online/Events/Details/10688/8TMTFQ210688290
Processing: /Online/Events/Details/10688/L6XGIW210688322
Skipping already visited URL: h

In [3]:
 # Regular expressions for extracting date and time
date_regex = r'([A-Za-z]{3} \d{1,2})'  # Matches the month and day, e.g., "Aug 21"
time_regex = r'(\d{2}:\d{2})'  # Matches the time, e.g., "16:00"
# data_list=[]
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
reserve_buttons = soup.find_all('button', string="Reserve")

for button in reserve_buttons:
    try:
        if 'hide' not in button.get('class', []):
            # Extracting the attributes of the button (like start time, end time, etc.)
            start_time = button.get('start')
            end_time = button.get('end')
            court_label = button.get('courtlabel')

            # Apply regular expressions to extract date and time
            if start_time and end_time:
                # Extract date from start_time
                date_match = re.search(date_regex, start_time)
                date = date_match.group(0) if date_match else "N/A"

                # Extract time from start_time and end_time
                start_time_match = re.search(time_regex, start_time)
                end_time_match = re.search(time_regex, end_time)

                if start_time_match and end_time_match:
                    # Convert 24-hour time to simplified "p/a" format
                    start_dt = datetime.strptime(start_time_match.group(0), '%H:%M')
                    end_dt = datetime.strptime(end_time_match.group(0), '%H:%M')
                    start_time_str = start_dt.strftime('%I').lstrip('0') + start_dt.strftime('%p').lower()[0]
                    end_time_str = end_dt.strftime('%I').lstrip('0') + end_dt.strftime('%p').lower()[0]
                    time_range = f"{start_time_str}-{end_time_str}"
                else:
                    time_range = "N/A"
            else:
                date = "N/A"
                time_range = "N/A"

            # Assuming Title = "Open", Fees = $0, Registrants = 0, Participant Count = 0
            title = "Open"
            fees = 0
            registrants_count = 0
            participant_count = 0

            # Append the data to the list
            data_list.append({
                'Title': title,
                'Date': date,  # Extracted date using regex
                'Time': time_range,  # Extracted and formatted time range
                'Fees': fees,
                'Info': participant_count,
                'AdditionalInfo': "None",  # Placeholder for additional info if needed
                'Coach': "None",  # Assuming no coach for open courts
                'Court': court_label,
                'Registrants': registrants_count
            })
    
    except Exception as e:
        # Handle any exceptions that occur during the process
        print(f"Error processing button: {e}")
        continue  # Skip this button and move to the next one
# Step 7: Convert the data into a DataFrame and display/save it
df = pd.DataFrame(data_list)
# print(df)

# Save the data to a CSV file if needed
# df.to_csv('open_courts_reservation_info.csv', index=False)

# Close the browser
# driver.quit()

In [4]:
# Convert the list of dictionaries to a DataFrame using pandas
df = pd.concat([pd.DataFrame([data]) for data in data_list], ignore_index=True)

# Optionally, save the DataFrame to a CSV file


# Get the current date in the desired format
current_date = datetime.now().strftime('%Y-%m-%d')

# Construct the filename with the date
filename = f'reservation_data_{current_date}.csv'

# Save the DataFrame to CSV with the date in the filename
df.to_csv(filename, index=False)
driver.quit()