In [3]:
# 1. Import and install requirements
import logging
from util.database import Database
from util.scraper import Scraper
import json
import itertools
import chromedriver_autoinstaller

database = Database("sqlite:///data/pdga_data.db")
scraper = Scraper()

chromedriver_autoinstaller.install()

In [None]:
# 2. Build list of courses and write to JSON
courses = scraper.get_courses_from_dgscene()
course_names = {
    course: scraper.get_readable_course_name(course) for course in courses
}
with open('data/course_names.json', 'w') as f:
    json.dump(course_names, f, indent=4)

In [None]:
# 3. Build list of events and write to JSON
with open('data/course_names.json') as f:
    course_names: dict = json.load(f)

with open('data/course_events.json') as f:
    course_events: dict = json.load(f)

for i, course in enumerate(course_names):
    if course in course_events:
        logging.info(f'Skipping {course} (already scraped)...')
        continue

    logging.info(f'Fetching event {i}/{len(course_names)}')
    course_events[course] = scraper.get_all_sanctioned_events(course)
    logging.info(course_events[course])

    # periodically save ratings to file
    with open('data/course_events.json', 'w') as f:
        json.dump(course_events, f, indent=4)

logging.info("Done")

In [None]:
# 4. Fetch ratings for each event and load into DB
try:
    logging.info('Fetching ratings...')
    
    with open('data/course_names.json') as f:
        course_names: dict = json.load(f)
    with open('data/course_events.json') as f:
        course_events: dict = json.load(f)
    
    for i, course in enumerate(course_events):
        events = course_events[course]
        rounds = []

        for j, event in enumerate(events):
            event_id = event['event_id']
            if database.event_exists(event_id):
                logging.info(f'Skipping {event_id} (already scraped)...')
                continue

            course_ratings = scraper.get_round_ratings_for_tournament(event_id)
            rounds.extend(course_ratings)
            logging.info(f'Event {j+1}/{len(events)} - Course {i+1}/{len(course_events)}')

        data = {
            'course_name': course,
            'readable_course_name': course_names[course],
            'events': course_events[course],
            'rounds': rounds
        }
        database.insert_course_data(data)
        

except BaseException as e:
    logging.info(f'Error fetching ratings: {e.with_traceback()}')
except KeyboardInterrupt as e:
    logging.info(f'Error fetching ratings: {e.with_traceback()}')

scraper.cleanup()
logging.info("Done")

In [None]:
from models.round import group_comparable_rounds
from util.database import Round, Database
from fuzzywuzzy import process, fuzz
import numpy as np

course_name = 'Ship Rock DGC'
layout_name = 'Gold'
database = Database("sqlite:///data/pdga_data.db")
rounds = database.query_all_course_rounds(readable_course_name=course_name)
all_layout_names = set([round.layout_name for round in rounds])
scored_layouts: tuple[str, int] = process.extractBests(layout_name, all_layout_names, scorer=fuzz.partial_token_sort_ratio, score_cutoff=0, limit=10)
matching_layout_names = [layout for layout, _ in process.extractBests(layout_name, all_layout_names, scorer=fuzz.partial_token_sort_ratio, score_cutoff=75, limit=100)]
matching_rounds = [round for round in rounds if round.layout_name in matching_layout_names]
grouped_rounds = group_comparable_rounds(matching_rounds, threshold=1)

print(f"Rounds: {len(matching_rounds)}")
print(f"Clusters: {len(grouped_rounds)}")
for layout in grouped_rounds:
    print(int(np.mean([r.layout_total_distance for r in layout.rounds_used])))
    print(layout.layout_hole_distances)
    print([r.layout_total_distance for r in layout.rounds_used]) 
    print(len(layout.rounds_used))


In [None]:
# Comparable round grouping testing
from models.round import group_comparable_rounds
from util.database import Round, Database
from fuzzywuzzy import process, fuzz
import numpy as np

course_name = 'Ship Rock DGC'
layout_name = 'Gold'
database = Database("sqlite:///data/pdga_data.db")
rounds = database.query_rounds_for_course(readable_course_name=course_name)
all_layout_names = set([round.layout_name for round in rounds])
scored_layouts: tuple[str, int] = process.extractBests(layout_name, all_layout_names, scorer=fuzz.partial_token_sort_ratio, score_cutoff=0, limit=10)
matching_layout_names = [layout for layout, _ in process.extractBests(layout_name, all_layout_names, scorer=fuzz.partial_token_sort_ratio, score_cutoff=75, limit=100)]
matching_rounds = [round for round in rounds if round.layout_name in matching_layout_names]
grouped_rounds = group_comparable_rounds(matching_rounds, threshold=1)

print(f"Rounds: {len(matching_rounds)}")
print(f"Clusters: {len(grouped_rounds)}")
for layout in grouped_rounds:
    print(int(np.mean([r.layout_total_distance for r in layout.rounds_used])))
    print(layout.layout_hole_distances)
    print([r.layout_total_distance for r in layout.rounds_used]) 
    print(len(layout.rounds_used))


In [4]:
from scripts.scrape_events import scrape_events

with open('config/scraper.json') as f:
    config: dict = json.load(f)

scrape_events(config)

[29/Nov/2024 21:28:45] INFO - Scraping events...
[29/Nov/2024 21:28:46] INFO - Fetching events for 22 Birdwalk...
[29/Nov/2024 21:28:46] INFO - Progress: 1/7365 = 0.01%, Elapsed time: 00:00:00 seconds
[29/Nov/2024 21:28:46] INFO - 
[29/Nov/2024 21:28:46] INFO - Fetching events for 4 Wing Campground...
[29/Nov/2024 21:28:46] INFO - Progress: 2/7365 = 0.03%, Elapsed time: 00:00:01 seconds
[29/Nov/2024 21:28:46] INFO - 
[29/Nov/2024 21:28:46] INFO - Fetching events for Alexander Hamilton Park...
[29/Nov/2024 21:28:47] INFO - Progress: 3/7365 = 0.04%, Elapsed time: 00:00:01 seconds
[29/Nov/2024 21:28:47] INFO - 
[29/Nov/2024 21:28:47] INFO - Fetching events for Anderson Park...
[29/Nov/2024 21:28:47] INFO - Progress: 4/7365 = 0.05%, Elapsed time: 00:00:01 seconds
[29/Nov/2024 21:28:47] INFO - 
[29/Nov/2024 21:28:47] INFO - Fetching events for Bear Lake Open...
[29/Nov/2024 21:28:47] INFO - Progress: 5/7365 = 0.07%, Elapsed time: 00:00:01 seconds
[29/Nov/2024 21:28:47] INFO - 
[29/Nov/2024 

In [None]:
from scripts.scrape_ratings import scrape_ratings

with open('config/scraper.json') as f:
    config: dict = json.load(f)

scrape_ratings(config)