In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from io import StringIO

# Set up the Selenium driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# URL of the table
url = "https://www.dgpt.com/season-stats/?season=2024&league=dgpt"

# Load the page
driver.get(url)

# Optional: Add a small delay to ensure the page fully loads
import time
time.sleep(10)

# Get the page source (fully rendered)
html_content = driver.page_source

# Close the browser
driver.quit()

# Wrap the HTML content in StringIO to avoid FutureWarning
html_io = StringIO(html_content)

# Read the HTML table from the StringIO object
df = pd.read_html(html_io, flavor='lxml')[0]

# Save the DataFrame to a CSV file
csv_file = "dgpt_2024_stats.csv"
df.to_csv(csv_file, index=False)

# Display the first few rows to verify
print(df.head())

In [None]:
# Load the CSV file
csv_file = "dgpt_2024_stats_cleaned.csv"
df = pd.read_csv(csv_file)

# Identify columns that contain percentages with ordinal suffixes
# From the screenshot, these columns are: "Circle 1 in Reg %", "Circle 2 in Reg %", "Scramble %", "Circle 1X Putting %", "Circle 2 Putting %", "Fairway Hits %"
percentage_columns = [

    "Circle 1 in Reg %",
    "Circle 2 in Reg %",
    "Scramble %",
    "Circle 1x Putting %",
    "Circle 2 Putting %",
    "Fairway Hits %",
    "Parked %",
]

# Function to clean the percentage values by removing ordinal suffixes
def clean_percentage(value):
    if isinstance(value, str):
        # Split the string and take the first part (the number)
        return value.split()[0]
    return value

# Apply the cleaning function to each percentage column
for col in percentage_columns:
    df[col] = df[col].apply(clean_percentage)

# Convert the cleaned columns to numeric type (float)
for col in percentage_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Save the cleaned DataFrame to a new CSV file
cleaned_csv_file = "dgpt_2024_stats_cleaned_final.csv"
df.to_csv(cleaned_csv_file, index=False)

# Display the first few rows to verify
print(df.head())

In [None]:

df = pd.read_csv("dgpt_2024_stats_no_percentage_no_stringscore.csv")

# Function to calculate average score from the Scores column
def calculate_avg_score(scores_str):
    # Split the scores string into individual percentages
    birdie, par, plus1, plus2 = map(float, scores_str.split())
    # Convert percentages to decimals and calculate average
    avg = (birdie * -1 + par * 0 + plus1 * 1 + plus2 * 2) / 100
    return round(avg, 6)

# Apply the function to the Scores column
df["Scores"] = df["Scores"].apply(calculate_avg_score)

# Save the updated CSV
df.to_csv("updated_dgpt_2024_stats.csv", index=False)