# Setup/Imports

In [29]:
from __future__ import annotations

import typing

import yaml
import numpy as np

with open("data/2023-05-20_nationals_c.yaml", 'r') as file:
    data = yaml.safe_load(file)

# Average Event Score Metric

In [30]:
# Team List in order index of team number - 1 (0-indexed)
teams = [team['school'] for team in data.get('Teams')]

# Event Order (0-indexed)
events = [event['name'] for event in data.get('Events')]


In [31]:
trial_events = [d['name'] for d in data.get('Events') if d.get('trial', False)]
total_scores: dict[str, int] = {t: 0 for t in teams} # team number : total score (sum)
full_scores: dict[str, list[int]] = {t: [] for t in teams} # full array of scores for each team

for placement in data.get('Placings'):
    total_scores[teams[placement['team']-1]] += placement.get('place', len(teams)) if not placement['event'] in trial_events else 0
    full_scores[teams[placement['team']-1]].append(placement.get('place', len(teams))) if not placement['event'] in trial_events else 0
    
print(total_scores)
print(full_scores)


{'Syosset High School': 381, 'Ward Melville High School': 477, 'Troy High School': 279, 'Castro Valley High School': 321, 'William G. Enloe High School': 367, 'North Carolina School of Science and Mathematics': 470, 'Grand Haven High School': 405, 'Pioneer High School': 543, 'Harriton High School': 293, 'Lower Merion High School': 417, 'Adlai E. Stevenson High School': 208, 'New Trier High School': 333, 'F.W. Buchholz High School': 500, 'Boca Raton Community High School': 608, 'Solon High School': 228, 'Mason High School': 233, 'Seven Lakes High School': 326, 'William P. Clements High School': 462, 'Fulton Science Academy': 673, 'Brookwood High School': 562, 'Thomas Jefferson High School for Science and Technology': 577, 'Marquette University High School': 370, 'Ladue Horton Watkins High School': 535, 'West Windsor-Plainsboro High School North': 423, 'Centennial High School': 538, 'Carmel High School': 470, 'Acton-Boxborough Regional High School': 298, 'Mounds View High School': 688, '

In [32]:
averages = {t: total_scores[t]/(len(data.get('Events'))-len(trial_events)) for t in teams}
print(averages)

{'Syosset High School': 16.565217391304348, 'Ward Melville High School': 20.73913043478261, 'Troy High School': 12.130434782608695, 'Castro Valley High School': 13.956521739130435, 'William G. Enloe High School': 15.956521739130435, 'North Carolina School of Science and Mathematics': 20.434782608695652, 'Grand Haven High School': 17.608695652173914, 'Pioneer High School': 23.608695652173914, 'Harriton High School': 12.73913043478261, 'Lower Merion High School': 18.130434782608695, 'Adlai E. Stevenson High School': 9.043478260869565, 'New Trier High School': 14.478260869565217, 'F.W. Buchholz High School': 21.73913043478261, 'Boca Raton Community High School': 26.434782608695652, 'Solon High School': 9.91304347826087, 'Mason High School': 10.130434782608695, 'Seven Lakes High School': 14.173913043478262, 'William P. Clements High School': 20.08695652173913, 'Fulton Science Academy': 29.26086956521739, 'Brookwood High School': 24.434782608695652, 'Thomas Jefferson High School for Science

# Distinguishing "Bombed" Events
### Using the following methods to determine "bombed" events:
##### These values become the upper fence for the data
- $\alpha \times \overline{X} $
- $\alpha \times (Q_3 - Q_1) + Q_3$ &nbsp; (IQR)
- $\alpha \times \sigma + \overline{X} $&nbsp;&nbsp;&nbsp; (Standard Deviation)

where $\alpha$ is a multiplicative constant


### Mean Method (alpha = 2)

In [33]:
bombed_events = {t: [] for t in teams}

def mean(alpha: int = 2):
    for placement in data.get('Placings'):
        if placement.get('place', len(teams)) > averages[teams[placement['team']-1]] * alpha and not placement['event'] in trial_events:
            bombed_events[teams[placement['team']-1]].append(placement['event'])
            
    print("\"Bombed\" events by school:", bombed_events, "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))

mean()

"Bombed" events by school: {'Syosset High School': ['Bridge', 'Flight', 'Remote Sensing'], 'Ward Melville High School': [], 'Troy High School': ['Chemistry Lab', 'Forensics', 'WiFi Lab'], 'Castro Valley High School': ['Scrambler', 'Write It Do It'], 'William G. Enloe High School': ['Astronomy', 'Scrambler'], 'North Carolina School of Science and Mathematics': ['Environmental Chemistry', 'Forensics'], 'Grand Haven High School': ['Cell Biology', 'Chemistry Lab', 'Detector Building'], 'Pioneer High School': ['Write It Do It'], 'Harriton High School': ['Dynamic Planet', 'Fermi Questions'], 'Lower Merion High School': ['Remote Sensing'], 'Adlai E. Stevenson High School': ['Experimental Design', 'Trajectory'], 'New Trier High School': ['Experimental Design', 'Scrambler', 'Trajectory'], 'F.W. Buchholz High School': ['Experimental Design', 'Write It Do It'], 'Boca Raton Community High School': [], 'Solon High School': ['Experimental Design', 'Remote Sensing'], 'Mason High School': ['Trajectory

### IQR Method (alpha = 1.5, standard outlier formula)

In [34]:
bombed_events = {t: [] for t in teams}

def iqr(alpha: int = 1.5):
    for placement in data.get('Placings'):
        q1 = np.quantile(full_scores[teams[placement['team']-1]], 0.25)
        q3 = np.quantile(full_scores[teams[placement['team']-1]], 0.75)
        if placement.get('place', len(teams)) > ((q3 - q1) * alpha + q3) and not placement['event'] in trial_events:
            bombed_events[teams[placement['team']-1]].append(placement['event'])
            
    print("\"Bombed\" events by school:", bombed_events, "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))
    
iqr()

"Bombed" events by school: {'Syosset High School': ['Flight'], 'Ward Melville High School': [], 'Troy High School': [], 'Castro Valley High School': ['Scrambler'], 'William G. Enloe High School': [], 'North Carolina School of Science and Mathematics': [], 'Grand Haven High School': [], 'Pioneer High School': [], 'Harriton High School': ['Fermi Questions'], 'Lower Merion High School': ['Remote Sensing'], 'Adlai E. Stevenson High School': ['Experimental Design', 'Trajectory'], 'New Trier High School': ['Scrambler'], 'F.W. Buchholz High School': ['Experimental Design', 'Write It Do It'], 'Boca Raton Community High School': [], 'Solon High School': ['Experimental Design', 'Remote Sensing'], 'Mason High School': ['Trajectory'], 'Seven Lakes High School': [], 'William P. Clements High School': [], 'Fulton Science Academy': [], 'Brookwood High School': ['Fermi Questions'], 'Thomas Jefferson High School for Science and Technology': ['Write It Do It'], 'Marquette University High School': [], 'L

In [35]:
bombed_events = {t: [] for t in teams}

def std_deviation(alpha: int = 1.5):
    for placement in data.get('Placings'):
        mean = np.mean(full_scores[teams[placement['team']-1]])
        std = np.std(full_scores[teams[placement['team']-1]])
        if placement.get('place', len(teams)) > (mean + alpha * std) and not placement['event'] in trial_events:
            bombed_events[teams[placement['team']-1]].append(placement['event'])
            
    print("\"Bombed\" events by school:", bombed_events, "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))
    
std_deviation()

"Bombed" events by school: {'Syosset High School': ['Bridge', 'Flight', 'Remote Sensing'], 'Ward Melville High School': ['Environmental Chemistry'], 'Troy High School': ['Chemistry Lab', 'Forensics', 'WiFi Lab', 'Write It Do It'], 'Castro Valley High School': ['Scrambler'], 'William G. Enloe High School': ['Astronomy', 'Scrambler'], 'North Carolina School of Science and Mathematics': ['Environmental Chemistry', 'Forensics'], 'Grand Haven High School': ['Chemistry Lab', 'Detector Building'], 'Pioneer High School': ['Anatomy and Physiology', 'Write It Do It'], 'Harriton High School': ['Detector Building', 'Dynamic Planet', 'Fermi Questions'], 'Lower Merion High School': ['Remote Sensing'], 'Adlai E. Stevenson High School': ['Experimental Design', 'Trajectory'], 'New Trier High School': ['Experimental Design', 'Scrambler'], 'F.W. Buchholz High School': ['Experimental Design', 'Write It Do It'], 'Boca Raton Community High School': ['Environmental Chemistry'], 'Solon High School': ['Experim

## Consistent Flaws
All of these methods are too sensitive to "top" teams while do not account nearly as well for lower ranked teams. A possible solution would be a scalar multiplier based on the team's rank. This would have a greater account for bombs even when the mean/median place on a team is lower. 

# Recalculating scores with auto drop "bombed" events


In [36]:
method: typing.Callable = std_deviation

# run method
alpha = 2
method(alpha)

score_copy = full_scores.copy()
drops = round(sum([len(bombed_events[t]) for t in bombed_events])/len(teams))

for _ in range(drops):
    for team in score_copy:
        score_copy[team].remove(max(score_copy[team]))


"Bombed" events by school: {'Syosset High School': ['Bridge', 'Flight', 'Remote Sensing', 'Flight'], 'Ward Melville High School': ['Environmental Chemistry'], 'Troy High School': ['Chemistry Lab', 'Forensics', 'WiFi Lab', 'Write It Do It'], 'Castro Valley High School': ['Scrambler', 'Scrambler'], 'William G. Enloe High School': ['Astronomy', 'Scrambler', 'Scrambler'], 'North Carolina School of Science and Mathematics': ['Environmental Chemistry', 'Forensics', 'Environmental Chemistry', 'Forensics'], 'Grand Haven High School': ['Chemistry Lab', 'Detector Building', 'Detector Building'], 'Pioneer High School': ['Anatomy and Physiology', 'Write It Do It', 'Write It Do It'], 'Harriton High School': ['Detector Building', 'Dynamic Planet', 'Fermi Questions', 'Fermi Questions'], 'Lower Merion High School': ['Remote Sensing', 'Remote Sensing'], 'Adlai E. Stevenson High School': ['Experimental Design', 'Trajectory', 'Experimental Design', 'Trajectory'], 'New Trier High School': ['Experimental D

## Recalculate Ranking

In [37]:
score_with_drops = {t: sum(score_copy[t]) for t in score_copy}

print(dict(sorted(score_with_drops.items(), key=lambda item: item[1])))

{'Solon High School': 160, 'Adlai E. Stevenson High School': 161, 'Mason High School': 171, 'Acton-Boxborough Regional High School': 217, 'Troy High School': 225, 'Harriton High School': 236, 'Castro Valley High School': 242, 'New Trier High School': 247, 'Seven Lakes High School': 254, 'Syosset High School': 291, 'William G. Enloe High School': 298, 'Marquette University High School': 304, 'Grand Haven High School': 319, 'West Windsor-Plainsboro High School North': 343, 'Lower Merion High School': 346, 'North Carolina School of Science and Mathematics': 369, 'William P. Clements High School': 372, 'Carmel High School': 373, 'F.W. Buchholz High School': 403, 'Ward Melville High School': 404, 'duPont Manual High School': 404, 'Pioneer High School': 440, 'Centennial High School': 452, 'Ladue Horton Watkins High School': 454, 'Brookwood High School': 470, 'Thomas Jefferson High School for Science and Technology': 472, 'Iolani School': 487, 'Tesla STEM High School': 502, 'Boca Raton Commun