In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import json
import requests
import numpy as np
import re

### Masters

In [12]:
# from code 
def scrape_data():
    # Replace 'url' with the URL of the webpage containing the table
    url = 'https://www.pgatour.com/tournaments/2024/masters-tournament/R2024014/past-results'
    response = requests.get(url)

    print("HTTP Response Status Code:", response.status_code)

    soup = BeautifulSoup(response.content, 'html.parser')

    # Print the first 200 characters of the HTML to check if it's what you expect
    # print("HTML Content (first 200 characters):", soup.prettify()[:200])

    table = soup.find('table', class_='chakra-table')

    # Check if the table was found
    # if table:
    #   print("**** Table found")

    data = []
    headers = []

    for row in table.find_all('tr'):
        cols = row.find_all(['th', 'td'])
        if not headers:
            headers = [header.text.strip() for header in cols]
        else:
            row_data = [col.text.strip() for col in cols]
            data.append(row_data)


    df = pd.DataFrame(data, columns=headers)

    # Your existing code for data cleaning and formatting
    df.drop(columns=['FedExCup Pts', 'Official Money'], inplace=True)
    df['Index'] = df.index
    df.set_index('Index', inplace=True)
    df['Player'] = df['Player'].str.replace('\(a\)', '', regex=True)
    df = df[df['Player'] != 'None']
    df = df.dropna(subset=['Player'], how='any')
    df = df[df['Player'].astype(str).str.strip() != '']  # Drop rows with empty strings in 'Player' column
    def is_non_numeric_or_blank(s):
        return not bool(re.match(r'^\+?-?\d*\.?\d*$', str(s)))

    columns_to_check = ['R1', 'R2', 'R3', 'R4', 'To Par']
    df[columns_to_check] = df[columns_to_check].fillna('0')

    for col in columns_to_check:
        df[col] = df[col].apply(lambda x: '0' if pd.isna(x) or is_non_numeric_or_blank(x) else x)

    # Updated section to handle empty strings and '-' cases
    for index, row in df.iterrows():
        for col in columns_to_check:
            val = row[col]
            if isinstance(val, str) and val.strip():  # Check for non-empty strings
                if val.startswith('+'):
                    val = int(val[1:])  # Convert to positive integer
                elif val.startswith('-') and val[1:]:  # Check for non-empty string after '-'
                    val = -int(val[1:])  # Convert to negative integer
                elif val.isdigit() or (val[1:].isdigit() and val[0] == '-'):
                    val = int(val)  # Convert to integer if it's a digit or negative digit

                # Update the value back to the DataFrame
                df.at[index, col] = val
            else:
                df.at[index, col] = 0  # Replace empty strings with 0

    return df
df = scrape_data()
df.head()

HTTP Response Status Code: 200


Unnamed: 0_level_0,Pos,Player,R1,R2,R3,R4,To Par
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1,Jon Rahm,-7,-3,1,-3,-12
1,T2,Phil Mickelson,-1,-3,3,-7,-8
2,T2,Brooks Koepka,-7,-5,1,3,-8
3,T4,Jordan Spieth,-3,-2,4,-6,-7
4,T4,Patrick Reed,-1,-2,0,-4,-7


In [38]:
# original
def scrape_data():
    
    url = 'https://www.pgatour.com/tournaments/2024/masters-tournament/R2024014/past-results'
    response = requests.get(url)

    print("HTTP Response Status Code:", response.status_code)

    soup = BeautifulSoup(response.content, 'html.parser')

    print("HTML Content (first 200 characters):", soup.prettify())

    table = soup.find('table', class_='chakra-table')
    # Check if the table was found
    if table:
        print("Table found!")

    data = []
    headers = []

    for row in table.find_all('tr'):
        cols = row.find_all(['th', 'td'])
        if not headers:
            headers = [header.text.strip() for header in cols]
        else:
            row_data = [col.text.strip() for col in cols]
            data.append(row_data)

    # Print the headers and a sample row to verify the data
    print("Headers:", headers)
    if data:
        print("Sample Row:", data[0])

    df = pd.DataFrame(data, columns=headers)

    return df

In [39]:
df = scrape_data()

HTTP Response Status Code: 200
HTML Content (first 200 characters): <!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width" name="viewport"/>
  <title>
   Masters Tournament 2024 Golf Leaderboard - PGA TOUR - Past Results
  </title>
  <meta content="Masters Tournament 2024 Golf Leaderboard - PGA TOUR - Past Results" name="title" property="og:title"/>
  <meta content="website" name="type" property="og:type"/>
  <meta content="PGA TOUR Tournament Past Results 2024 Masters Tournament, Augusta - Golf Scores and Results" name="description" property="og:description"/>
  <meta content="Masters Tournament 2024 Golf Leaderboard - PGA TOUR - Past Results" name="parsely-title"/>
  <meta content="PGA TOUR" name="parsely-author"/>
  <meta content="Leaderboard, Past Results" name="parsely-tags"/>
  <meta content="Leaderboard - Past Results" name="parsely-section"/>
  <meta content="non-post" name="parsely-type"/>
  <link href="/apple-touch-icon.png?v=4

Headers: ['Pos', 'Player', 'R1', 'R2', 'R3', 'R4', 'To Par', 'FedExCup Pts', 'Official Money']
Sample Row: ['1', 'Jon Rahm', '-7', '-3', '+1', '-3', '-12', '600.00', '$3,240,000']


In [44]:
df.head(50)

Unnamed: 0,Pos,Player,R1,R2,R3,R4,To Par,FedExCup Pts,Official Money
0,1,Jon Rahm,-7,-3,+1,-3,-12,600.0,"$3,240,000"
1,T2,Phil Mickelson,-1,-3,+3,-7,-8,0.0,"$1,584,000"
2,T2,Brooks Koepka,-7,-5,+1,+3,-8,0.0,"$1,584,000"
3,T4,Jordan Spieth,-3,-2,+4,-6,-7,126.667,"$744,000"
4,T4,Patrick Reed,-1,-2,E,-4,-7,0.0,"$744,000"
5,,,,,,,,,
6,T4,Russell Henley,+1,-5,-1,-2,-7,126.667,"$744,000"
7,T7,Cameron Young,-5,E,+3,-4,-6,97.0,"$580,500"
8,T7,Viktor Hovland,-7,+1,-2,+2,-6,97.0,"$580,500"
9,9,Sahith Theegala,+1,-2,+1,-5,-5,88.0,"$522,000"


In [45]:
import pandas as pd
import re

# Assuming 'df' is your DataFrame

# Your existing code for data cleaning and formatting
df.drop(columns=['FedExCup Pts', 'Official Money'], inplace=True)
df['Index'] = df.index
df.set_index('Index', inplace=True)
df['Player'] = df['Player'].str.replace(' \(a\)', '', regex=True)

def is_non_numeric_or_blank(s):
    return not bool(re.match(r'^\+?-?\d*\.?\d*$', str(s)))
def exact_plus_or_minus(s):
    return not bool(re.match(r'^[+-]$', str(s)))

columns_to_check = ['R1', 'R2', 'R3', 'R4', 'To Par']
df[columns_to_check] = df[columns_to_check].fillna('0')

for col in columns_to_check:
    df[col] = df[col].apply(lambda x: '0' if pd.isna(x) or is_non_numeric_or_blank(x) else x)
    df[col] = df[col].apply(lambda x: '0' if pd.isna(x) or exact_plus_or_minus(x) else x)

# Updated section to handle empty strings and '-' cases
for index, row in df.iterrows():
    for col in columns_to_check:
        val = row[col]
        if isinstance(val, str) and val.strip():  # Check for non-empty strings
            if val.startswith('+'):
                val = int(val[1:])  # Convert to positive integer
            elif val.startswith('-') and val[1:]:  # Check for non-empty string after '-'
                val = -int(val[1:])  # Convert to negative integer
            elif val.isdigit() or (val[1:].isdigit() and val[0] == '-'):
                val = int(val)  # Convert to integer if it's a digit or negative digit

            # Update the value back to the DataFrame
            df.at[index, col] = val
        else:
            df.at[index, col] = 0  # Replace empty strings with 0

In [46]:
df.head(100)

Unnamed: 0_level_0,Pos,Player,R1,R2,R3,R4,To Par
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1,Jon Rahm,0,0,0,0,0
1,T2,Phil Mickelson,0,0,0,0,0
2,T2,Brooks Koepka,0,0,0,0,0
3,T4,Jordan Spieth,0,0,0,0,0
4,T4,Patrick Reed,0,0,0,0,0
...,...,...,...,...,...,...,...
85,CUT,Sandy Lyle,0,0,0,0,0
86,W/D,Kevin Na,-,0,0,0,0
87,W/D,Will Zalatoris,-,0,0,0,0
88,W/D,Louis Oosthuizen,0,0,0,0,0


In [34]:
df.head(100)

Unnamed: 0_level_0,Pos,Player,R1,R2,R3,R4,To Par
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1,Jon Rahm,-7,-3,0,-3,-12
1,T2,Phil Mickelson,-1,-3,0,-7,-8
2,T2,Brooks Koepka,-7,-5,0,0,-8
3,T4,Jordan Spieth,-3,-2,0,-6,-7
4,T4,Patrick Reed,-1,-2,0,-4,-7
...,...,...,...,...,...,...,...
85,CUT,Sandy Lyle,0,0,0,0,0
86,W/D,Kevin Na,0,0,0,0,0
87,W/D,Will Zalatoris,0,0,0,0,0
88,W/D,Louis Oosthuizen,0,0,0,0,0


In [28]:
##df.replace('', np.nan, inplace=True)
#df.dropna(axis=0, how='any', inplace=True)
df['Player'] = df['Player'].str.replace(' \(a\)', '', regex=True)
#df = df[~df['Pos'].str.contains('CUT')]
columns_to_replace = ['R1', 'R2', 'R3', 'R4', 'To Par']
df[columns_to_replace] = df[columns_to_replace].replace('E', '0')
columns_to_replace = ['R1', 'R2', 'R3', 'R4', 'To Par']
df[columns_to_replace] = df[columns_to_replace].fillna('0')

Unnamed: 0_level_0,Pos,Player,R1,R2,R3,R4,To Par
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1,Jon Rahm,-7,-3,+1,-3,-12
1,T2,Phil Mickelson,-1,-3,+3,-7,-8
2,T2,Brooks Koepka,-7,-5,+1,+3,-8
3,T4,Jordan Spieth,-3,-2,+4,-6,-7
4,T4,Patrick Reed,-1,-2,0,-4,-7
...,...,...,...,...,...,...,...
85,CUT,Sandy Lyle,+9,+11,0,0,+20
86,W/D,Kevin Na,-,0,0,0,
87,W/D,Will Zalatoris,-,0,0,0,
88,W/D,Louis Oosthuizen,+4,0,0,0,+4


In [30]:
players = df
players['id'] = players['Player'].str.replace(' ', '').str.upper()
df.drop(columns=['Pos','R1','R2','R3','R4','To Par'], inplace=True)
players.head()
players.to_csv("players.csv")

SyntaxError: EOF while scanning triple-quoted string literal (390595961.py, line 6)

In [4]:
players = pd.read_csv('app/players_tiered.csv')
players.head()

Unnamed: 0.1,Unnamed: 0,Golfer,Rank,Odds,Tier
0,2,Scottie Scheffler,1,7/1,1
1,3,Rory McIlroy,2,7/1,1
2,5,Viktor Hovland,4,35/1,1
3,6,Patrick Cantlay,5,18/1,1
4,14,Jordan Spieth,14,16/1,1


In [22]:
# Sort the DataFrame by the 'Rank' column in ascending order
df = df.sort_values(by='Rank')

# Calculate the number of players in each tier
num_players = len(df)
players_per_tier = num_players // 6

# Create a Tier column
df['Tier'] = (df.index // players_per_tier) + 1

# Reset the index to start from 1
df.index = df.index + 1

df = df.sort_values(by='Tier', ascending=True)

# Display the DataFrame
df.head(30)

Unnamed: 0,Golfer,Rank,Odds,Tier
2,Scottie Scheffler,1,7/1,1
3,Rory McIlroy,2,7/1,1
5,Viktor Hovland,4,35/1,1
6,Patrick Cantlay,5,18/1,1
14,Jordan Spieth,14,16/1,1
7,Xander Schauffele,6,22/1,1
4,Jon Rahm,3,9/1,1
12,Collin Morikawa,12,22/1,1
10,Brian Harman,9,130/1,1
13,Tyrrell Hatton,13,50/1,1


In [19]:


# Reset the index if you want to reindex the DataFrame
#df = df.reset_index(drop=True)

In [20]:
df.head(100)

Unnamed: 0,Golfer,Rank,Odds,Tier
1,Scottie Scheffler,1,7/1,1
3,Jon Rahm,3,9/1,1
2,Rory McIlroy,2,7/1,1
5,Patrick Cantlay,5,18/1,1
14,Tommy Fleetwood,15,50/1,1
...,...,...,...,...
82,Larry Mize,>200,5000/1,6
74,Aldrich Potgieter,>200,2500/1,6
72,Gordon Sargent,>200,1000/1,6
73,Sam Bennett,>200,2000/1,6


In [24]:
df.to_csv("app/static/players_tiered.csv")

### Sentry

In [14]:
def scrape_data():

    

    return df

#df = scrape_data()

In [4]:
API_BASE_URL = "https://live-golf-data.p.rapidapi.com"

API_DEFAULT_HEADERS = {
    "x-rapidapi-host": "live-golf-data.p.rapidapi.com",
    "x-rapidapi-key": "a0ad1bcf9amsh91cc2112fc6cc5bp10c0dajsne93b3dec7c82",
}

url = "{}/leaderboard".format(API_BASE_URL)
query_string = {
    orgId: '1',
    tournId: '016',
    year: '2022'
}

    

resp = requests.get(url=url, headers=API_DEFAULT_HEADERS, params=query_string)
print(resp)

NameError: name 'tournId' is not defined

### Sentry Leaderboard Prep

In [10]:
import requests
import re

url = "https://live-golf-data.p.rapidapi.com/leaderboard"

leaderboard_scores = {"orgId":"1","tournId":"016","year":"2022"}

headers = {
"X-RapidAPI-Key": "a0ad1bcf9amsh91cc2112fc6cc5bp10c0dajsne93b3dec7c82",
"X-RapidAPI-Host": "live-golf-data.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=leaderboard_scores)

print(response.json())

{'_id': {'$oid': '61db2b8ec8ea597c24001db6'}, 'tournId': '016', 'roundId': {'$numberInt': '4'}, 'roundStatus': 'Official', 'year': '2022', 'cutLines': [{'cutCount': None, 'cutScore': 'E'}], 'leaderboardRows': [{'lastName': 'Smith', 'firstName': 'Cameron', 'playerId': '35891', 'courseId': '656', 'status': 'active', 'total': '-34', 'currentRoundScore': '-8', 'position': '1', 'totalStrokesFromCompletedRounds': '258', 'roundComplete': True, 'rounds': [{'scoreToPar': '-8', 'roundId': {'$numberInt': '1'}, 'strokes': {'$numberInt': '65'}, 'courseId': '656', 'courseName': 'Plantation Course at Kapalua'}, {'scoreToPar': '-9', 'roundId': {'$numberInt': '2'}, 'strokes': {'$numberInt': '64'}, 'courseId': '656', 'courseName': 'Plantation Course at Kapalua'}, {'scoreToPar': '-9', 'roundId': {'$numberInt': '3'}, 'strokes': {'$numberInt': '64'}, 'courseId': '656', 'courseName': 'Plantation Course at Kapalua'}, {'scoreToPar': '-8', 'roundId': {'$numberInt': '4'}, 'strokes': {'$numberInt': '65'}, 'cours

In [11]:
def process_leaderboard_data(response):
    # Parse the JSON string
    json_data = response.json()

    # Extracting the 'leaderboardRows' data
    leaderboard_data = json_data.get('leaderboardRows', [])

    # Function to cleanse score values
    def cleanse_score(score):
        if score == 'E':
            return 0
        elif score is None:
            return None
        else:
            return int(score)

    # Creating lists to hold extracted data
    player_data = []

    # Extracting required data elements from the leaderboard data
    for player in leaderboard_data:
        player_info = {
            'Pos': player.get('position'),
            'Player': player.get('firstName') + ' ' + player.get('lastName'),
            'playerId': player.get('playerId'),
            'R1': cleanse_score(next((round_data['scoreToPar'] for round_data in player['rounds'] if round_data['roundId']['$numberInt'] == '1'), None)),
            'R2': cleanse_score(next((round_data['scoreToPar'] for round_data in player['rounds'] if round_data['roundId']['$numberInt'] == '2'), None)),
            'R3': cleanse_score(next((round_data['scoreToPar'] for round_data in player['rounds'] if round_data['roundId']['$numberInt'] == '3'), None)),
            'R4': cleanse_score(next((round_data['scoreToPar'] for round_data in player['rounds'] if round_data['roundId']['$numberInt'] == '4'), None)),
            'To Par': cleanse_score(player.get('total'))
        }
        player_data.append(player_info)

    # Create DataFrame from extracted data
    player_df = pd.DataFrame(player_data)

    return player_df


leaderboard_data = process_leaderboard_data(response)

'''
def generate_player_key(name):
    name_without_special_chars = re.sub(r'[^A-Za-z0-9]+', '', name)
    return name_without_special_chars.upper()

# Apply the function to create the 'PlayerKey' column
df['PlayerKey'] = df['Player'].apply(generate_player_key)
'''
leaderboard_data.head(100)

Unnamed: 0,Pos,Player,playerId,R1,R2,R3,R4,To Par,PlayerKey
0,1,Cameron Smith,35891,-8,-9,-9,-8,-34,CAMERONSMITH
1,2,Jon Rahm,46970,-7,-7,-12,-7,-33,JONRAHM
2,3,Matt Jones,26300,-3,-6,-11,-12,-32,MATTJONES
3,4,Patrick Cantlay,35450,-7,-6,-7,-6,-26,PATRICKCANTLAY
4,T5,Collin Morikawa,50525,-5,-3,-6,-11,-25,COLLINMORIKAWA
5,T5,Justin Thomas,33448,1,-6,-12,-8,-25,JUSTINTHOMAS
6,T5,Daniel Berger,40026,-7,-7,-7,-4,-25,DANIELBERGER
7,T8,Kevin Kisner,29478,-4,-5,-7,-8,-24,KEVINKISNER
8,T8,Sungjae Im,39971,-6,-6,-8,-4,-24,SUNGJAEIM
9,T10,Cam Davis,45157,-4,-5,-7,-7,-23,CAMDAVIS


### Sentry Player Prep - for drafting purposes

In [2]:
import requests
import re

url = "https://live-golf-data.p.rapidapi.com/tournament"

qualifiers = {"orgId":"1","tournId":"016","year":"2022"}

headers = {
	"X-RapidAPI-Key": "a0ad1bcf9amsh91cc2112fc6cc5bp10c0dajsne93b3dec7c82",
	"X-RapidAPI-Host": "live-golf-data.p.rapidapi.com"
}

qualifiers = requests.get(url, headers=headers, params=qualifiers)

print(qualifiers.json())

{'_id': {'$oid': '61afc70ac2ab0fbcbc8e7327'}, 'tournId': '016', 'name': 'Sentry Tournament of Champions', 'date': {'weekNumber': '1', 'start': {'$date': {'$numberLong': '1641427200000'}}, 'end': {'$date': {'$numberLong': '1641686400000'}}}, 'format': 'stroke', 'courses': [{'host': 'Yes', 'location': {'state': 'HI', 'city': 'Kapalua, Maui', 'country': 'USA'}, 'courseName': 'Plantation Course at Kapalua', 'courseId': '656', 'parFrontNine': '36', 'parBackNine': '37', 'parTotal': '73', 'holes': [{'holeId': {'$numberInt': '1'}, 'rounds': [{'roundId': {'$numberInt': '1'}, 'par': '4', 'distance': '506', 'stimp': '10.5'}, {'roundId': {'$numberInt': '2'}, 'par': '4', 'distance': '524', 'stimp': '10.5'}, {'roundId': {'$numberInt': '3'}, 'par': '4', 'distance': '532', 'stimp': '10.5'}, {'roundId': {'$numberInt': '4'}, 'par': '4', 'distance': '508', 'stimp': '10.5'}]}, {'holeId': {'$numberInt': '2'}, 'rounds': [{'roundId': {'$numberInt': '1'}, 'par': '3', 'distance': '227', 'stimp': '10.5'}, {'rou

In [3]:
def process_qualifier_data(response):
    json_data = response.json()
    player_data = []
    
    players = json_data.get('players', [])
    for player in players:
        player_info = {
            'Player': player.get('firstName') + ' ' + player.get('lastName'),
            'playerId': player.get('playerId'),
        }
        player_data.append(player_info)
    # Creating a DataFrame from the player_data list
    df = pd.DataFrame(player_data)

    return df


qualifiers = process_qualifier_data(qualifiers)

qualifiers.head(100)

Unnamed: 0,Player,playerId
0,Cameron Smith,35891
1,Jon Rahm,46970
2,Matt Jones,26300
3,Patrick Cantlay,35450
4,Collin Morikawa,50525
5,Justin Thomas,33448
6,Daniel Berger,40026
7,Kevin Kisner,29478
8,Sungjae Im,39971
9,Cam Davis,45157


In [4]:
import requests

url = "https://live-golf-data.p.rapidapi.com/stats"

world_ranking = {"year":"2022","statId":"186"}

headers = {
	"X-RapidAPI-Key": "a0ad1bcf9amsh91cc2112fc6cc5bp10c0dajsne93b3dec7c82",
	"X-RapidAPI-Host": "live-golf-data.p.rapidapi.com"
}

world_ranking = requests.get(url, headers=headers, params=world_ranking)

print(world_ranking.json())

{'_id': {'$oid': '61c3b0f422585bef71f02a37'}, 'name': 'Official World Golf Ranking', 'year': '2022', 'weekNum': {'$numberInt': '52'}, 'rankings': [{'lastName': 'McIlroy', 'firstName': 'Rory', 'rank': {'$numberInt': '1'}, 'previousRank': {'$numberInt': '1'}, 'events': {'$numberInt': '45'}, 'totalPoints': {'$numberDouble': '394.24055781934874'}, 'avgPoints': {'$numberDouble': '8.7609'}, 'pointsLost': {'$numberDouble': '-32.947125201798386'}, 'pointsGained': {'$numberDouble': '29.49024292884998'}, 'playerId': '28237'}, {'lastName': 'Scheffler', 'firstName': 'Scottie', 'rank': {'$numberInt': '2'}, 'previousRank': {'$numberInt': '2'}, 'events': {'$numberInt': '51'}, 'totalPoints': {'$numberDouble': '410.69666391772483'}, 'avgPoints': {'$numberDouble': '8.0529'}, 'pointsLost': {'$numberDouble': '-38.25331870090833'}, 'pointsGained': {'$numberDouble': '11.776456171117736'}, 'playerId': '46046'}, {'lastName': 'Rahm', 'firstName': 'Jon', 'rank': {'$numberInt': '3'}, 'previousRank': {'$numberInt

In [5]:
def process_world_rankings(response):
    json_data = response.json()
    player_data = []
    
    players = json_data.get('rankings', [])
    for player in players:
        rank_value = int(player.get('rank', {}).get('$numberInt', '0'))
        player_info = {
            ##'Player': player.get('firstName') + ' ' + player.get('lastName'),
            'playerId': player.get('playerId'),
            'Rank': rank_value

        }
        player_data.append(player_info)
    # Creating a DataFrame from the player_data list
    df = pd.DataFrame(player_data)

    return df


# Process the JSON response
world_ranking = process_world_rankings(world_ranking)

world_ranking.head(100)

Unnamed: 0,playerId,Rank
0,28237,1
1,46046,2
2,46970,3
3,35891,4
4,35450,5
...,...,...
95,48018,96
96,31646,97
97,46340,98
98,,99


### join tournament qualifiers to rankings, and make tiers - for draft purposes 

In [6]:
players_for_draft = pd.merge(qualifiers, world_ranking, on='playerId', how='left')
players_for_draft.head()

Unnamed: 0,Player,playerId,Rank
0,Cameron Smith,35891,4
1,Jon Rahm,46970,3
2,Matt Jones,26300,120
3,Patrick Cantlay,35450,5
4,Collin Morikawa,50525,7


In [12]:
players_for_draft = players_for_draft.sort_values(by='Rank')

# Calculate the number of players in each tier
num_players = len(players_for_draft)
players_per_tier = num_players // 6

# Create a Tier column based on the ascending 'Rank'
players_for_draft['Tier'] = ((players_for_draft['Rank'].rank(method='dense') - 1) // players_per_tier) + 1

# Convert the 'Tier' column to integers
players_for_draft['Tier'] = players_for_draft['Tier'].astype(int)

# Reset the index to start from 1
players_for_draft.index = players_for_draft.index + 1
players_for_draft.reset_index(drop=True, inplace=True)
players_for_draft.head(100)

Unnamed: 0,Player,playerId,Rank,Tier
0,Jon Rahm,46970,3,1
1,Cameron Smith,35891,4,1
2,Patrick Cantlay,35450,5,1
3,Xander Schauffele,48081,6,1
4,Collin Morikawa,50525,7,1
5,Justin Thomas,33448,9,1
6,Viktor Hovland,46717,11,2
7,Max Homa,39977,12,2
8,Tony Finau,29725,13,2
9,Sam Burns,47504,15,2
