# Setup/Imports

In [106]:
from __future__ import annotations

import typing

import yaml
import numpy as np

with open("data/2023-02-04_solon_invitational_c.yaml", 'r') as file:
    data = yaml.safe_load(file)


## Event Scoring

In [107]:
# Team List in order index of team number - 1 (0-indexed)
team_list: list[dict[str, str | int]] = sorted(data.get("Teams"), key=lambda x: x['number'])
teams = [team['school'] + " " + team.get('suffix', "") for team in team_list]

# Event Order (0-indexed)
events = [event['name'] for event in data.get('Events')]


In [108]:
trial_events = [d['name'] for d in data.get('Events') if d.get('trial', False)]
total_scores: dict[str, int] = {t: 0 for t in teams} # team number : total score (sum)
full_scores: dict[str, list[int]] = {t: [] for t in teams} # full array of scores for each team

for placement in data.get('Placings'):
    total_scores[teams[placement['team']-1]] += placement.get('place', len(teams)) if not placement['event'] in trial_events else 0
    full_scores[teams[placement['team']-1]].append(placement.get('place', len(teams))) if not placement['event'] in trial_events else 0
    
# print(total_scores)
# print(full_scores)


## Average Score Per School

In [109]:
averages = {t: total_scores[t]/(len(data.get('Events'))-len(trial_events)) for t in teams}
print(averages)

{'Adlai E. Stevenson High School Gold': 8.08695652173913, 'Adlai E. Stevenson High School Green': 15.826086956521738, 'Archbishop Hoban High School Blue': 40.21739130434783, 'Archbishop Hoban High School Gold': 66.26086956521739, 'Athens Area High School ': 134.0, 'Beachwood High School Gold': 19.869565217391305, 'Beachwood High School White': 50.26086956521739, 'Boyceville High School JV': 34.69565217391305, 'Boyceville High School Varsity': 53.95652173913044, 'Brecksville-Broadview Heights High School ': 44.26086956521739, 'Canal Winchester High School ': 43.69565217391305, 'Centerville High School Black': 13.304347826086957, 'Centerville High School Gold': 24.130434782608695, 'Centerville High School White': 24.52173913043478, 'Chardon High School ': 28.130434782608695, 'Cleveland Heights High School ': 55.95652173913044, 'Columbus Academy ': 59.30434782608695, 'Cumberland Valley High School X': 11.0, 'Cumberland Valley High School Y': 17.608695652173914, 'duPont Manual High School 

# Distinguishing "Bombed" Events
### Using the following methods to determine "bombed" events:
##### These values become the upper fence for the data
- $\alpha \times \overline{X} $
- $\alpha \times (Q_3 - Q_1) + Q_3$ &nbsp; (IQR)
- $\alpha \times \sigma + \overline{X} $&nbsp;&nbsp;&nbsp; (Standard Deviation)

where $\alpha$ is a multiplicative constant


### Mean Method (alpha = 2)

In [110]:
bombed_events = {t: [] for t in teams}

def mean(alpha: int = 2):
    for placement in data.get('Placings'):
        if placement.get('place', len(teams)) > averages[teams[placement['team']-1]] * alpha and not placement['event'] in trial_events:
            bombed_events[teams[placement['team']-1]].append(placement['event'])
            
    print("\"Bombed\" events by school:", bombed_events, "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))

mean()

"Bombed" events by school: {'Adlai E. Stevenson High School Gold': ['Experimental Design', 'Flight'], 'Adlai E. Stevenson High School Green': ['Scrambler', 'Trajectory', 'Write It Do It'], 'Archbishop Hoban High School Blue': [], 'Archbishop Hoban High School Gold': [], 'Athens Area High School ': [], 'Beachwood High School Gold': ['Astronomy', 'Experimental Design', 'Flight', 'Write It Do It'], 'Beachwood High School White': [], 'Boyceville High School JV': [], 'Boyceville High School Varsity': [], 'Brecksville-Broadview Heights High School ': [], 'Canal Winchester High School ': [], 'Centerville High School Black': ['Cell Biology', 'Write It Do It'], 'Centerville High School Gold': ['Write It Do It'], 'Centerville High School White': [], 'Chardon High School ': [], 'Cleveland Heights High School ': [], 'Columbus Academy ': [], 'Cumberland Valley High School X': ['Write It Do It'], 'Cumberland Valley High School Y': ['Forensics', 'Remote Sensing'], 'duPont Manual High School Red': ['F

### IQR Method (alpha = 1.5, standard outlier formula)

In [111]:
bombed_events = {t: [] for t in teams}

def iqr(alpha: int = 1.5):
    for placement in data.get('Placings'):
        q1 = np.quantile(full_scores[teams[placement['team']-1]], 0.25)
        q3 = np.quantile(full_scores[teams[placement['team']-1]], 0.75)
        if placement.get('place', len(teams)) > ((q3 - q1) * alpha + q3) and not placement['event'] in trial_events:
            bombed_events[teams[placement['team']-1]].append(placement['event'])
            
    print("\"Bombed\" events by school:", bombed_events, "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))
    
iqr()

"Bombed" events by school: {'Adlai E. Stevenson High School Gold': ['Flight'], 'Adlai E. Stevenson High School Green': ['Trajectory'], 'Archbishop Hoban High School Blue': [], 'Archbishop Hoban High School Gold': [], 'Athens Area High School ': [], 'Beachwood High School Gold': ['Write It Do It'], 'Beachwood High School White': [], 'Boyceville High School JV': [], 'Boyceville High School Varsity': [], 'Brecksville-Broadview Heights High School ': ['Detector Building', 'Environmental Chemistry', 'Scrambler', 'Trajectory'], 'Canal Winchester High School ': [], 'Centerville High School Black': ['Write It Do It'], 'Centerville High School Gold': ['Write It Do It'], 'Centerville High School White': [], 'Chardon High School ': [], 'Cleveland Heights High School ': [], 'Columbus Academy ': [], 'Cumberland Valley High School X': ['Write It Do It'], 'Cumberland Valley High School Y': [], 'duPont Manual High School Red': ['Fermi Questions'], 'duPont Manual High School White': ['Forensics'], 'Gra

In [112]:
bombed_events = {t: [] for t in teams}

def std_deviation(alpha: int = 1.5):
    for placement in data.get('Placings'):
        mean = np.mean(full_scores[teams[placement['team']-1]])
        std = np.std(full_scores[teams[placement['team']-1]])
        if placement.get('place', len(teams)) > (mean + alpha * std) and not placement['event'] in trial_events:
            bombed_events[teams[placement['team']-1]].append(placement['event'])
            
    print("\"Bombed\" events by school:", bombed_events, "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))
    
std_deviation()

"Bombed" events by school: {'Adlai E. Stevenson High School Gold': ['Experimental Design', 'Flight'], 'Adlai E. Stevenson High School Green': ['Scrambler', 'Trajectory', 'Write It Do It'], 'Archbishop Hoban High School Blue': [], 'Archbishop Hoban High School Gold': [], 'Athens Area High School ': [], 'Beachwood High School Gold': ['Flight', 'Write It Do It'], 'Beachwood High School White': [], 'Boyceville High School JV': ['Environmental Chemistry'], 'Boyceville High School Varsity': [], 'Brecksville-Broadview Heights High School ': ['Detector Building', 'Environmental Chemistry', 'Scrambler', 'Trajectory'], 'Canal Winchester High School ': ['Detector Building', 'Environmental Chemistry', 'Forensics'], 'Centerville High School Black': ['Write It Do It'], 'Centerville High School Gold': ['Remote Sensing', 'Write It Do It'], 'Centerville High School White': ['Astronomy', 'Trajectory'], 'Chardon High School ': ['Codebusters'], 'Cleveland Heights High School ': [], 'Columbus Academy ': []

## Consistent Flaws
All of these methods are too sensitive to "top" teams while do not account nearly as well for lower ranked teams. A possible solution would be a scalar multiplier based on the team's rank. This would have a greater account for bombs even when the mean/median place on a team is lower. 

# Recalculating scores with auto drop "bombed" events


In [113]:
method: typing.Callable = std_deviation

# run method
bombed_events = {t: [] for t in teams} # clear map

alpha = 2
method(alpha)

score_copy = full_scores.copy()
drops = round(sum([len(bombed_events[t]) for t in bombed_events])/len(teams))

for _ in range(drops):
    for team in score_copy:
        score_copy[team].remove(max(score_copy[team]))


"Bombed" events by school: {'Adlai E. Stevenson High School Gold': ['Flight'], 'Adlai E. Stevenson High School Green': ['Trajectory'], 'Archbishop Hoban High School Blue': [], 'Archbishop Hoban High School Gold': [], 'Athens Area High School ': [], 'Beachwood High School Gold': ['Write It Do It'], 'Beachwood High School White': [], 'Boyceville High School JV': [], 'Boyceville High School Varsity': [], 'Brecksville-Broadview Heights High School ': [], 'Canal Winchester High School ': [], 'Centerville High School Black': ['Write It Do It'], 'Centerville High School Gold': ['Write It Do It'], 'Centerville High School White': [], 'Chardon High School ': ['Codebusters'], 'Cleveland Heights High School ': [], 'Columbus Academy ': [], 'Cumberland Valley High School X': ['Write It Do It'], 'Cumberland Valley High School Y': ['Remote Sensing'], 'duPont Manual High School Red': ['Fermi Questions', 'Forestry'], 'duPont Manual High School White': ['Forensics'], 'Granville High School ': [], 'Hawke

## Recalculate Ranking

In [114]:
score_with_drops = {t: sum(score_copy[t]) for t in score_copy}

print(dict(sorted(score_with_drops.items(), key=lambda item: item[1])))

{'Solon High School A': 107, 'Adlai E. Stevenson High School Gold': 148, 'New Trier High School Bass Fishing': 197, 'Cumberland Valley High School X': 209, 'Mason High School Green': 221, 'duPont Manual High School Red': 243, 'Centerville High School Black': 264, 'Mason High School White': 274, 'Pioneer High School ': 313, 'Solon High School B': 319, 'Adlai E. Stevenson High School Green': 321, 'New Albany High School Varsity': 333, 'Cumberland Valley High School Y': 366, 'Beachwood High School Gold': 399, 'Mayfield High School Green': 415, 'Saline High School Blue': 425, 'Westlake High School Green': 443, 'duPont Manual High School White': 493, 'Mentor High School Gray': 494, 'Solon High School C': 498, 'Centerville High School Gold': 499, 'Kenston High School Blue': 516, 'Centerville High School White': 519, 'Hudson High School Blue': 523, 'New Trier High School Bowling': 524, 'International Academy Central ': 580, 'Kenston High School Bombers': 589, 'Chardon High School ': 591, 'Ole