In [None]:
import requests
import pandas as pd
import json
import time
import os

In [None]:
## Scrape all games and possible outcomes for 2024 men and women

prop_headers = {
    'method': 'GET',
    'scheme': 'https',
    'authority': 'gambit-api.fantasy.espn.com',
    'Accept': 'application/json',
    'Sec-Fetch-Site': 'same-site',
    'Accept-Encoding': 'gzip, deflate, br',
    'If-None-Match': '"03efc5c3424ce92cbb211d9c36c8dd15e"',
    'Sec-Fetch-Mode': 'cors',
    'Accept-Language': 'en-US,en;q=0.9',
    'Origin': 'https://fantasy.espn.com',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.1.15',
    'Referer': 'https://fantasy.espn.com/',
    'Connection': 'keep-alive',
    'Host': 'gambit-api.fantasy.espn.com',
    'Sec-Fetch-Dest': 'empty'
}

# competitions = [240, 241]
competitions = [240] # DLS

for competition in competitions:
    if competition == 240:
        comp = 'mens'
    else:
        comp = 'womens'
        
    prop = requests.get(f"https://gambit-api.fantasy.espn.com/apis/v1/propositions?challengeId={competition}",
                        headers = prop_headers).text
    games = pd.DataFrame(json.loads(prop))
    games.drop(columns=['possibleOutcomes'], inplace=True)
    games.to_csv(f'{comp}_games.csv', index=False)

    game_ids = []
    outcome_id = []
    regionCompetitorId = []
    matchupPosition = []
    percentage = []
    scoringFormatId = []
    seed = []
    ranking = []

    for game in range(0,63):
        game_id = json.loads(prop)[game]['id']
        outcomes = json.loads(prop)[game]['possibleOutcomes']
        for outcome in outcomes:
            game_ids.append(game_id)
            outcome_id.append(outcome['id'])
            regionCompetitorId.append(outcome['regionCompetitorId'])
            matchupPosition.append(outcome['matchupPosition'])
            percentage.append(outcome['choiceCounters'][0]['percentage'])
            scoringFormatId.append(outcome['choiceCounters'][0]['scoringFormatId'])
            for item in outcome['mappings']:
                if item['type'] == 'RANKING':
                    # ranking 
                    ranking.append(item['value'])
                elif item['type'] == 'SEED':
                    # seed 
                    seed.append(item['value'])

    game_outcomes = pd.DataFrame({
        'id': game_ids,
        'outcome_id': outcome_id,
        'regionCompetitorId': regionCompetitorId,
        'matchupPosition': matchupPosition,
        'percentage': percentage,
        'scoringFormatId': scoringFormatId,
        'seed': seed,
        'ranking': ranking
    })

    game_outcomes = pd.merge(game_outcomes, games, on='id', how='left')
    game_outcomes.to_csv(f'{comp}_games_outcomes.csv', index=False)

In [None]:
## Scrape all group IDs (brackets live in here) for 2024 men and women
for competition in competitions:
    if competition == 240:
        comp = 'mens'
    else:
        comp = 'womens'
    
    gids = []
    gid_public = []
    gid_name = []
    gid_size = []

    group_headers = {
            'Accept': 'application/json',
            'gambit-filter': f"{{\"limit\":100000,\"offset\":0,\"sortRank\":{{\"sortAsc\":true,\"sortPriority\":1}}}}",
            'Origin': 'https://fantasy.espn.com',
            'Referer': 'https://fantasy.espn.com/',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site'}
    url = f"https://gambit-api.fantasy.espn.com/apis/v1/challenges/{competition}/groups?platform=chui&view=chui_default_group"
    groups = json.loads(requests.get(url, headers = group_headers).text)

    for group in groups:
        gids.append(group['groupId'])
        gid_public.append(group['groupSettings']['public'])
        gid_name.append(group['groupSettings']['name'])
        gid_size.append(group['size'])

    groups_df = pd.DataFrame({
        'group_id': gids,
        'gid_public': gid_public,
        'gid_name': gid_name,
        'gid_size': gid_size
        })

    groups_df.to_csv(f'{comp}_group_ids.csv', index=False)


In [None]:
## Scrape all brackets for 2024 men and women
for competition in competitions:
    if competition == 240:
        comp = 'mens'
    else:
        comp = 'womens'
        
    os.mkdir(f"{comp}_picks")
        
    groups = pd.read_csv(f'{comp}_group_ids.csv')
    # Filter to gid_public TRUE and gid_size < 5000
    groups = groups[(groups['gid_public'] == True) & (groups['gid_size'] < 5000)]
    # Small groups first
    groups = groups.sort_values(by='gid_size')
    gids = groups['group_id']
    entry_ids = []
    group_ids = []

    for gid in gids:
        time.sleep(1)
        offset = 0
        limit = 200

        if comp == "mens":
            url = f"https://gambit-api.fantasy.espn.com/apis/v1/challenges/tournament-challenge-bracket-2024/groups/{gid}?view=chui_default_group&platform=chui"
        else:
            url = f"https://gambit-api.fantasy.espn.com/apis/v1/challenges/tournament-challenge-bracket-women-2024/groups/{gid}?view=chui_default_group&platform=chui"

        group_headers = {
            'Accept': 'application/json',
            'gambit-filter': f"{{\"filterSortId\":{{\"value\":null}},\"limit\":{limit},\"offset\":{offset},\"sortRank\":{{\"sortAsc\":true,\"sortPriority\":1}}}}",
            'Origin': 'https://fantasy.espn.com',
            'Referer': 'https://fantasy.espn.com/',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site'}
        group_brackets = json.loads(requests.get(url, headers = group_headers).text)
        gsize = group_brackets['size']

        for entry in group_brackets['entries']:
            entry_ids.append(entry['id'])
            group_ids.append(gid)

        while gsize > (offset + limit) and group_brackets['entries']:
            time.sleep(2)
            offset = offset + limit
            group_headers = {
                'Accept': 'application/json',
                'gambit-filter': f"{{\"filterSortId\":{{\"value\":null}},\"limit\":{limit},\"offset\":{offset},\"sortRank\":{{\"sortAsc\":true,\"sortPriority\":1}}}}",
                'Origin': 'https://fantasy.espn.com',
                'Referer': 'https://fantasy.espn.com/',
                'Sec-Fetch-Dest': 'empty',
                'Sec-Fetch-Mode': 'cors',
                'Sec-Fetch-Site': 'same-site'}
            group_brackets = json.loads(requests.get(url, headers = group_headers).text)

            for entry in group_brackets['entries']:
                entry_ids.append(entry['id'])
                group_ids.append(gid)

    group_df = pd.DataFrame({
        'bracket_id': entry_ids,
        'group_id': group_ids
    })

    group_df.to_csv(f'{comp}_bracket_ids.csv', index=False)
    
    
    brackets = pd.read_csv(f'{comp}_bracket_ids.csv')
    bracket_ids = brackets['bracket_id']
    
    for bid in bracket_ids:
        time.sleep(1)
        try:
            bracket = requests.get(f"https://gambit-api.fantasy.espn.com/apis/v1/challenges/tournament-challenge-bracket-2024/entries/{bid}?platform=chui&view=chui_default").text
        except:
            continue
        try:
            picks = json.loads(bracket)['picks']
            outcome_ids = []
            proposition_ids = []
            bids = []
            names = []
            ids = []
            
            for pick in picks:
                bids.append(bid)
                try:
                    names.append(json.loads(bracket)['member']['displayName'])
                except:
                    names.append("NA")
                try:
                    ids.append(json.loads(bracket)['member']['id'])
                except:
                    ids.append("NA")
                try:
                    outcome_ids.append(pick['outcomesPicked'][0]['outcomeId'])
                except:
                    outcome_ids.append("NA")
                try:
                    proposition_ids.append(pick['propositionId'])
                except:
                    proposition_ids.append("NA")
        except:
            continue
        
        try:
            bdf = pd.DataFrame({
                'name': names,
                'user_id':ids,
                'bracket_id': bids,
                'game_id': proposition_ids,
                'pick_id': outcome_ids
            })
            bdf.to_csv(f'{comp}_picks/{bid}.csv', index=False)
        except:
            continue
