# Setup/Imports

In [37]:
from __future__ import annotations

import typing

import yaml
import numpy as np

import utils

with open("data/2023-05-20_nationals_c.yaml", 'r') as file:
    data = yaml.safe_load(file)


## Event Scoring

In [38]:
# Team List in order index of team number - 1 (0-indexed)
team_list: list[dict[str, str | int]] = sorted(data.get("Teams"), key=lambda x: x['number'])  
teams: dict[int, str] = {team['number']: (team['school'] + " " + team.get('suffix', "")) for team in team_list} # team number : team name

# Event Order (0-indexed)
events = [event['name'] for event in data.get('Events')]


In [39]:
trial_events = [d['name'] for d in data.get('Events') if d.get('trial', False)]
total_scores: dict[str, int] = {t: 0 for t in teams} # team number : total score (sum) 
full_scores: dict[str, list[int]] = {t: [] for t in teams} # full array of scores for each team

for placement in data.get('Placings'):
    total_scores[placement['team']] += placement.get('place', len(teams)) if not placement['event'] in trial_events else 0
    full_scores[placement['team']].append(placement.get('place', len(teams))) if not placement['event'] in trial_events else 0
    
# print(total_scores)
# print(full_scores)


## Average Score Per School

In [40]:
averages = {t: total_scores[t]/(len(data.get('Events'))-len(trial_events)) for t in teams}
print(utils.pretty_print(teams, dict(sorted(averages.items(), key=lambda item: item[1]))))

{'Adlai E. Stevenson High School ': 9.043478260869565, 'Solon High School ': 9.91304347826087, 'Mason High School ': 10.130434782608695, 'Troy High School ': 12.130434782608695, 'Harriton High School ': 12.73913043478261, 'Acton-Boxborough Regional High School ': 12.956521739130435, 'Castro Valley High School ': 13.956521739130435, 'Seven Lakes High School ': 14.173913043478262, 'New Trier High School ': 14.478260869565217, 'William G. Enloe High School ': 15.956521739130435, 'Marquette University High School ': 16.08695652173913, 'Syosset High School ': 16.565217391304348, 'Grand Haven High School ': 17.608695652173914, 'Lower Merion High School ': 18.130434782608695, 'West Windsor-Plainsboro High School North ': 18.391304347826086, 'William P. Clements High School ': 20.08695652173913, 'North Carolina School of Science and Mathematics ': 20.434782608695652, 'Carmel High School ': 20.434782608695652, 'Ward Melville High School ': 20.73913043478261, 'F.W. Buchholz High School ': 21.739

# Distinguishing "Bombed" Events
### Using the following methods to determine "bombed" events:
##### These values become the upper fence for the data
- $\alpha \times \overline{X} $
- $\alpha \times (Q_3 - Q_1) + Q_3$ &nbsp; (IQR)
- $\alpha \times \sigma + \overline{X} $&nbsp;&nbsp;&nbsp; (Standard Deviation)

where $\alpha$ is a multiplicative constant


### Mean Method (alpha = 2)

In [41]:
bombed_events = {t: [] for t in teams}

def mean(alpha: float = 2):
    for placement in data.get('Placings'):
        if placement.get('place', len(teams)) > averages[placement['team']] * alpha and not placement['event'] in trial_events:
            bombed_events[placement['team']].append(placement['event'])
            
    print("\"Bombed\" events by school:", utils.pretty_print(teams, bombed_events), "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))

mean()

"Bombed" events by school: {'Syosset High School ': ['Bridge', 'Flight', 'Remote Sensing'], 'Ward Melville High School ': [], 'Troy High School ': ['Chemistry Lab', 'Forensics', 'WiFi Lab'], 'Castro Valley High School ': ['Scrambler', 'Write It Do It'], 'William G. Enloe High School ': ['Astronomy', 'Scrambler'], 'North Carolina School of Science and Mathematics ': ['Environmental Chemistry', 'Forensics'], 'Grand Haven High School ': ['Cell Biology', 'Chemistry Lab', 'Detector Building'], 'Pioneer High School ': ['Write It Do It'], 'Harriton High School ': ['Dynamic Planet', 'Fermi Questions'], 'Lower Merion High School ': ['Remote Sensing'], 'Adlai E. Stevenson High School ': ['Experimental Design', 'Trajectory'], 'New Trier High School ': ['Experimental Design', 'Scrambler', 'Trajectory'], 'F.W. Buchholz High School ': ['Experimental Design', 'Write It Do It'], 'Boca Raton Community High School ': [], 'Solon High School ': ['Experimental Design', 'Remote Sensing'], 'Mason High School

### IQR Method (alpha = 1.5, standard outlier formula)

In [42]:
bombed_events = {t: [] for t in teams}

def iqr(alpha: float = 1.5):
    for placement in data.get('Placings'):
        q1 = np.quantile(full_scores[placement['team']], 0.25)
        q3 = np.quantile(full_scores[placement['team']], 0.75)
        if placement.get('place', len(teams)) > ((q3 - q1) * alpha + q3) and not placement['event'] in trial_events:
            bombed_events[placement['team']].append(placement['event'])
            
    print("\"Bombed\" events by school:", utils.pretty_print(teams, bombed_events), "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))
    
iqr()

"Bombed" events by school: {'Syosset High School ': ['Flight'], 'Ward Melville High School ': [], 'Troy High School ': [], 'Castro Valley High School ': ['Scrambler'], 'William G. Enloe High School ': [], 'North Carolina School of Science and Mathematics ': [], 'Grand Haven High School ': [], 'Pioneer High School ': [], 'Harriton High School ': ['Fermi Questions'], 'Lower Merion High School ': ['Remote Sensing'], 'Adlai E. Stevenson High School ': ['Experimental Design', 'Trajectory'], 'New Trier High School ': ['Scrambler'], 'F.W. Buchholz High School ': ['Experimental Design', 'Write It Do It'], 'Boca Raton Community High School ': [], 'Solon High School ': ['Experimental Design', 'Remote Sensing'], 'Mason High School ': ['Trajectory'], 'Seven Lakes High School ': [], 'William P. Clements High School ': [], 'Fulton Science Academy ': [], 'Brookwood High School ': ['Fermi Questions'], 'Thomas Jefferson High School for Science and Technology ': ['Write It Do It'], 'Marquette University

In [43]:
bombed_events = {t: [] for t in teams}

def std_deviation(alpha: float = 1.5):
    for placement in data.get('Placings'):
        mean = np.mean(full_scores[placement['team']])
        std = np.std(full_scores[placement['team']])
        if placement.get('place', len(teams)) > (mean + alpha * std) and not placement['event'] in trial_events:
            bombed_events[placement['team']].append(placement['event'])
            
    print("\"Bombed\" events by school:", utils.pretty_print(teams, bombed_events), "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))
    
std_deviation()

"Bombed" events by school: {'Syosset High School ': ['Bridge', 'Flight', 'Remote Sensing'], 'Ward Melville High School ': ['Environmental Chemistry'], 'Troy High School ': ['Chemistry Lab', 'Forensics', 'WiFi Lab', 'Write It Do It'], 'Castro Valley High School ': ['Scrambler'], 'William G. Enloe High School ': ['Astronomy', 'Scrambler'], 'North Carolina School of Science and Mathematics ': ['Environmental Chemistry', 'Forensics'], 'Grand Haven High School ': ['Chemistry Lab', 'Detector Building'], 'Pioneer High School ': ['Anatomy and Physiology', 'Write It Do It'], 'Harriton High School ': ['Detector Building', 'Dynamic Planet', 'Fermi Questions'], 'Lower Merion High School ': ['Remote Sensing'], 'Adlai E. Stevenson High School ': ['Experimental Design', 'Trajectory'], 'New Trier High School ': ['Experimental Design', 'Scrambler'], 'F.W. Buchholz High School ': ['Experimental Design', 'Write It Do It'], 'Boca Raton Community High School ': ['Environmental Chemistry'], 'Solon High Scho

## Consistent Flaws
All of these methods are too sensitive to "top" teams while do not account nearly as well for lower ranked teams. A possible solution would be a scalar multiplier based on the team's rank. This would have a greater account for bombs even when the mean/median place on a team is lower. 

# Recalculating scores with auto drop "bombed" events


In [44]:
method: typing.Callable = std_deviation

# run method
bombed_events = {t: [] for t in teams} # clear map

alpha = 1.5
method(alpha)

score_copy = full_scores.copy()
drops = round(sum([len(bombed_events[t]) for t in bombed_events])/len(teams))

for _ in range(drops):
    for team in score_copy:
        score_copy[team].remove(max(score_copy[team]))


"Bombed" events by school: {'Syosset High School ': ['Bridge', 'Flight', 'Remote Sensing'], 'Ward Melville High School ': ['Environmental Chemistry'], 'Troy High School ': ['Chemistry Lab', 'Forensics', 'WiFi Lab', 'Write It Do It'], 'Castro Valley High School ': ['Scrambler'], 'William G. Enloe High School ': ['Astronomy', 'Scrambler'], 'North Carolina School of Science and Mathematics ': ['Environmental Chemistry', 'Forensics'], 'Grand Haven High School ': ['Chemistry Lab', 'Detector Building'], 'Pioneer High School ': ['Anatomy and Physiology', 'Write It Do It'], 'Harriton High School ': ['Detector Building', 'Dynamic Planet', 'Fermi Questions'], 'Lower Merion High School ': ['Remote Sensing'], 'Adlai E. Stevenson High School ': ['Experimental Design', 'Trajectory'], 'New Trier High School ': ['Experimental Design', 'Scrambler'], 'F.W. Buchholz High School ': ['Experimental Design', 'Write It Do It'], 'Boca Raton Community High School ': ['Environmental Chemistry'], 'Solon High Scho

## Recalculate Ranking

In [45]:
score_with_drops = {t: sum(score_copy[t]) for t in score_copy}

sorted_scores = dict(sorted(score_with_drops.items(), key=lambda item: item[1]))
print(f"After dropping {drops} events: \n")
print(utils.pretty_print(teams, sorted_scores))


After dropping 1 events: 

{'Adlai E. Stevenson High School ': 184, 'Solon High School ': 192, 'Mason High School ': 193, 'Acton-Boxborough Regional High School ': 248, 'Troy High School ': 252, 'Harriton High School ': 262, 'Castro Valley High School ': 270, 'Seven Lakes High School ': 281, 'New Trier High School ': 283, 'Syosset High School ': 328, 'William G. Enloe High School ': 330, 'Marquette University High School ': 337, 'Grand Haven High School ': 360, 'Lower Merion High School ': 376, 'West Windsor-Plainsboro High School North ': 383, 'William P. Clements High School ': 414, 'North Carolina School of Science and Mathematics ': 416, 'Carmel High School ': 419, 'Ward Melville High School ': 438, 'F.W. Buchholz High School ': 449, 'duPont Manual High School ': 454, 'Pioneer High School ': 487, 'Ladue Horton Watkins High School ': 490, 'Centennial High School ': 490, 'Brookwood High School ': 514, 'Thomas Jefferson High School for Science and Technology ': 518, 'Iolani School ': 