In [2]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd


In [3]:


# URL of the page you want to scrape
url = "https://sports.yahoo.com/golf/european-tour/2025/masters-tournament/"

# Fetch the page's HTML
response = requests.get(url)
html_content = response.text


In [4]:
# Find the position of the "competitors" key in the HTML
start_index = html_content.find('"leaderboard":')

if start_index == -1:
    print("Competitors data not found")
else:
    # Find the position of the opening bracket '['
    start_index = html_content.find('{', start_index)
    # Find the position of the closing bracket ']'
    end_index = html_content.find('}', start_index) + 1

    # Extract the string representing the competitors data
    competitors_str = html_content[start_index:end_index]
    print(competitors_str)


{"0":{"display_name":"Rory McIlroy","position":"1","home_url":"\u002Fgolf\u002Fplayers\u002FRory+Mcilroy\u002F8016\u002F","last_name":"McIlroy","results":{"0":{"round":{"cut_reason":null,"made_cut":"true","strokes":"72","tee_time":"2025-04-10 17:12:00","to_par":"E"}


In [5]:
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Find the table in the HTML
table = soup.find('table')

if table:
    # Extract table headers
    headers = [header.text.strip() for header in table.find_all('th')]

    # Extract table rows
    rows = []
    for row in table.find_all('tr'):
        cells = row.find_all('td')
        if cells:
            rows.append([cell.text.strip() for cell in cells])

    # Create a DataFrame from the table data
    df_table = pd.DataFrame(rows, columns=headers)
    print(df_table)
else:
    print("No table found on the page")

                            Pos                 Name  R1  R2  R3  R4 Today  \
0             Rory McIlroy    1         Rory McIlroy  72  66  66        -6   
1        Bryson DeChambeau    2    Bryson DeChambeau  69  68  69        -3   
2            Corey Conners    3        Corey Conners  68  70  70        -2   
3             Patrick Reed   T4         Patrick Reed  71  70  69        -3   
4             Ludvig Aberg   T4         Ludvig Aberg  68  73  69        -3   
.. ..                  ...  ...                  ...  ..  ..  ..  ..   ...   
90     Jose Luis Ballester  T89  Jose Luis Ballester  76  MC  MC  MC     -   
91           Angel Cabrera  T92        Angel Cabrera  75  MC  MC  MC     -   
92       Thriston Lawrence  T92    Thriston Lawrence  79  MC  MC  MC     -   
93               Noah Kent  T92            Noah Kent  79  MC  MC  MC     -   
94             Nick Dunlap   95          Nick Dunlap  90  MC  MC  MC     -   

   Thru Total Strokes  
0     F   -12     204  
1     F   -10  

In [22]:
# Convert the 'Total' column to integer type
df_table['Total'] = pd.to_numeric(df_table['Total'], errors='coerce')

# Display the updated DataFrame
print(df_table)

                            Pos                 Name  R1  R2  R3  R4 Today  \
0             Rory McIlroy    1         Rory McIlroy  72  66  66        -6   
1        Bryson DeChambeau    2    Bryson DeChambeau  69  68  69        -3   
2            Corey Conners    3        Corey Conners  68  70  70        -2   
3             Patrick Reed   T4         Patrick Reed  71  70  69        -3   
4             Ludvig Aberg   T4         Ludvig Aberg  68  73  69        -3   
.. ..                  ...  ...                  ...  ..  ..  ..  ..   ...   
90     Jose Luis Ballester  T89  Jose Luis Ballester  76  MC  MC  MC     -   
91           Angel Cabrera  T92        Angel Cabrera  75  MC  MC  MC     -   
92       Thriston Lawrence  T92    Thriston Lawrence  79  MC  MC  MC     -   
93               Noah Kent  T92            Noah Kent  79  MC  MC  MC     -   
94             Nick Dunlap   95          Nick Dunlap  90  MC  MC  MC     -   

   Thru  Total Strokes  
0     F  -12.0     204  
1     F  -10.

In [23]:
# Define the players for each team
adam_team_players = [
    "Scottie Scheffler", "Justin Thomas", "Brooks Koepka", "Patrick Cantlay",
    "Tommy Fleetwood", "Will Zalatoris", "Dustin Johnson", "Patrick Reed"
]
phil_team_players = [
    "Rory McIlroy", "Ludvig Aberg", "Hideki Matsuyama", "Akshay Bhatia",
    "Min Woo Lee", "Robert Macintyre", "Tony Finau", "Wyndham Clark"
]
jordan_team_players = [
    "Collin Morikawa", "Xander Schauffele", "Cameron Smith", "Sepp Straka",
    "Tyrrell Hatton", "Russell Henley", "Nicolai Hojgaard", "Sahith Theegala"
]
steve_team_players = [
    "Jon Rahm", "Bryson DeChambeau", "Joaquin Niemann", "Jordan Spieth",
    "Shane Lowry", "Viktor Hovland", "Corey Conners", "Jason Day"
]

# Filter the data for each team
adam_team = df_table[df_table['Name'].isin(adam_team_players)]
phil_team = df_table[df_table['Name'].isin(phil_team_players)]
jordan_team = df_table[df_table['Name'].isin(jordan_team_players)]
steve_team = df_table[df_table['Name'].isin(steve_team_players)]

# Display the data frames
print("Adam Team:")
print(adam_team)


Adam Team:
                         Pos               Name  R1  R2  R3  R4 Today Thru  \
3          Patrick Reed   T4       Patrick Reed  71  70  69        -3    F   
8     Scottie Scheffler   T6  Scottie Scheffler  68  71  72         E    F   
33      Tommy Fleetwood  T30    Tommy Fleetwood  73  69  75         3    F   
46      Patrick Cantlay  T42    Patrick Cantlay  74  72  73         1    F   
47        Justin Thomas  T48      Justin Thomas  73  71  76         4    F   
57       Dustin Johnson  T54     Dustin Johnson  74  MC  MC  MC     -    -   
69        Brooks Koepka  T67      Brooks Koepka  74  MC  MC  MC     -    -   
84       Will Zalatoris  T82     Will Zalatoris  74  MC  MC  MC     -    -   

    Total Strokes  
3    -6.0     210  
8    -5.0     211  
33    1.0     217  
46    3.0     219  
47    4.0     220  
57    3.0     147  
69    5.0     149  
84    8.0     152  


In [24]:
print("\nPhil Team:")
print(phil_team)



Phil Team:
                        Pos              Name  R1  R2  R3  R4 Today Thru  \
0         Rory McIlroy    1      Rory McIlroy  72  66  66        -6    F   
4         Ludvig Aberg   T4      Ludvig Aberg  68  73  69        -3    F   
43       Wyndham Clark  T42     Wyndham Clark  76  68  75         3    F   
48    Hideki Matsuyama  T48  Hideki Matsuyama  73  68  79         7    F   
50         Min Woo Lee  T48       Min Woo Lee  71  72  77         5    F   
52       Akshay Bhatia  T52     Akshay Bhatia  70  76  75         3    F   
61          Tony Finau  T59        Tony Finau  75  MC  MC  MC     -    -   
75    Robert Macintyre  T73  Robert Macintyre  75  MC  MC  MC     -    -   

    Total Strokes  
0   -12.0     204  
4    -6.0     210  
43    3.0     219  
48    4.0     220  
50    4.0     220  
52    5.0     221  
61    4.0     148  
75    6.0     150  


In [25]:
print("\nJordan Team:")
print(jordan_team)



Jordan Team:
                         Pos               Name  R1  R2  R3  R4 Today Thru  \
12    Xander Schauffele  T10  Xander Schauffele  73  69  70        -2    F   
14      Collin Morikawa  T14    Collin Morikawa  72  69  72         E    F   
16       Tyrrell Hatton  T17     Tyrrell Hatton  69  70  75         3    F   
31      Sahith Theegala  T30    Sahith Theegala  72  72  73         1    F   
55       Russell Henley  T54     Russell Henley  79  MC  MC  MC     -    -   
70        Cameron Smith  T67      Cameron Smith  71  MC  MC  MC     -    -   
71          Sepp Straka  T67        Sepp Straka  78  MC  MC  MC     -    -   
74     Nicolai Hojgaard  T73   Nicolai Hojgaard  76  MC  MC  MC     -    -   

    Total Strokes  
12   -4.0     212  
14   -3.0     213  
16   -2.0     214  
31    1.0     217  
55    3.0     147  
70    5.0     149  
71    5.0     149  
74    6.0     150  


In [26]:
print("\nSteve Team:")
print(steve_team)


Steve Team:
                         Pos               Name  R1  R2  R3 R4 Today Thru  \
1     Bryson DeChambeau    2  Bryson DeChambeau  69  68  69       -3    F   
2         Corey Conners    3      Corey Conners  68  70  70       -2    F   
6             Jason Day   T6          Jason Day  70  70  71       -1    F   
7           Shane Lowry   T6        Shane Lowry  71  68  72        E    F   
15       Viktor Hovland  T14     Viktor Hovland  71  69  73        1    F   
23        Jordan Spieth  T21      Jordan Spieth  73  73  69       -3    F   
27             Jon Rahm  T25           Jon Rahm  75  71  70       -2    F   
28      Joaquin Niemann  T25    Joaquin Niemann  72  74  70       -2    F   

    Total Strokes  
1   -10.0     206  
2    -8.0     208  
6    -5.0     211  
7    -5.0     211  
15   -3.0     213  
23   -1.0     215  
27    NaN     216  
28    NaN     216  


In [29]:
# Calculate the total of the 5 lowest scores for each team
adam_total = adam_team['Total'].nsmallest(5).sum()
phil_total = phil_team['Total'].nsmallest(5).sum()
jordan_total = jordan_team['Total'].nsmallest(5).sum()
steve_total = steve_team['Total'].nsmallest(5).sum()

# Create a new DataFrame with the results
team_totals = pd.DataFrame({
    'Name': ['Adam', 'Phil', 'Jordan', 'Steve'],
    'Total Score': [adam_total, phil_total, jordan_total, steve_total]
})

# Sort the DataFrame by Total Score in ascending order
team_totals = team_totals.sort_values(by='Total Score', ascending=True).reset_index(drop=True)

# Display the new DataFrame
print(team_totals)

     Name  Total Score
0   Steve        -31.0
1    Phil         -7.0
2  Jordan         -5.0
3    Adam         -4.0


In [30]:
# Save the dataframes as HTML files
team_totals.to_html("team_totals.html", index=False)
adam_team.to_html("adam_team.html", index=False)
phil_team.to_html("phil_team.html", index=False)
jordan_team.to_html("jordan_team.html", index=False)
steve_team.to_html("steve_team.html", index=False)

print("HTML files have been created. Upload them to a GitHub repository and enable GitHub Pages to display them.")

HTML files have been created. Upload them to a GitHub repository and enable GitHub Pages to display them.
