# Setup/Imports

In [129]:
from __future__ import annotations

import typing

import yaml
import numpy as np

import utils

with open("data/2019-02-02_solon_invitational_c.yaml", 'r') as file:
    data = yaml.safe_load(file)


## Event Scoring

In [130]:
# Team List in order index of team number - 1 (0-indexed)
team_list: list[dict[str, str | int]] = sorted(data.get("Teams"), key=lambda x: x['number'])  
teams: dict[int, str] = {team['number']: (team['school'] + " " + team.get('suffix', "")) for team in team_list} # team number : team name

# Event Order (0-indexed)
events = [event['name'] for event in data.get('Events')]


In [131]:
trial_events = [d['name'] for d in data.get('Events') if d.get('trial', False)]
total_scores: dict[str, int] = {t: 0 for t in teams} # team number : total score (sum) 
full_scores: dict[str, list[int]] = {t: [] for t in teams} # full array of scores for each team

for placement in data.get('Placings'):
    total_scores[placement['team']] += placement.get('place', len(teams)) if not placement['event'] in trial_events else 0
    full_scores[placement['team']].append(placement.get('place', len(teams))) if not placement['event'] in trial_events else 0
    
# print(total_scores)
# print(full_scores)


## Average Score Per School

In [132]:
averages = {t: total_scores[t]/(len(data.get('Events'))-len(trial_events)) for t in teams}
print(utils.pretty_print(teams, dict(sorted(averages.items(), key=lambda item: item[1]))))

{'Solon High School ': 7.434782608695652, 'New Trier High School A': 8.434782608695652, 'Adlai E. Stevenson High School A': 9.826086956521738, 'Adlai E. Stevenson High School B': 9.91304347826087, 'Mason High School A': 10.0, 'Mason High School B': 10.217391304347826, 'Bayard Rustin High School A': 11.565217391304348, 'Northville High School B': 12.26086956521739, 'Mentor High School A': 15.91304347826087, 'Mentor High School B': 15.956521739130435, 'International Academy Central A': 16.608695652173914, 'Shady Side Academy Senior School A': 17.0, 'Beachwood High School A': 17.130434782608695, 'duPont Manual High School A': 17.304347826086957, 'Mayfield High School A': 20.82608695652174, 'New Trier High School B': 21.217391304347824, 'Hudson High School A': 21.695652173913043, 'St. Ignatius High School A': 22.17391304347826, 'Northville High School A': 23.0, 'Westlake High School A': 26.0, 'Solon High School Trial A': 26.434782608695652, 'duPont Manual High School B': 27.608695652173914

# Distinguishing "Bombed" Events
### Using the following methods to determine "bombed" events:
##### These values become the upper fence for the data
- $\alpha \times \overline{X} $
- $\alpha \times (Q_3 - Q_1) + Q_3$ &nbsp; (IQR)
- $\alpha \times \sigma + \overline{X} $&nbsp;&nbsp;&nbsp; (Standard Deviation)

where $\alpha$ is a multiplicative constant


### Mean Method (alpha = 2)

In [133]:
bombed_events = {t: [] for t in teams}

def mean(alpha: int = 2):
    for placement in data.get('Placings'):
        if placement.get('place', len(teams)) > averages[placement['team']] * alpha and not placement['event'] in trial_events:
            bombed_events[placement['team']].append(placement['event'])
            
    print("\"Bombed\" events by school:", utils.pretty_print(teams, bombed_events), "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))

mean()

"Bombed" events by school: {'Adlai E. Stevenson High School A': ['Fermi Questions', 'Mission Possible', 'Wright Stuff'], 'Adlai E. Stevenson High School B': ['Herpetology', 'Wright Stuff', 'Write It Do It'], 'Archbishop Hoban High School A': [], 'Archbishop Hoban High School B': [], 'Aurora High School ': [], 'Avon Lake High School ': [], 'Bayard Rustin High School A': ['Designer Genes'], 'Bayard Rustin High School B': [], 'Beachwood High School A': ['Anatomy and Physiology', 'Geologic Mapping', 'Mission Possible', 'Wright Stuff'], 'Beachwood High School B': ['Fermi Questions'], 'Bio-Med Science Academy ': [], 'Boyceville High School A': [], 'Boyceville High School B': [], 'Brecksville-Broadview Heights High School ': [], 'Charles F. Brush High School ': [], 'Chagrin Falls High School A': [], 'Chagrin Falls High School B': [], 'Chardon High School A': [], 'Chardon High School B': [], 'Columbus Academy ': [], 'duPont Manual High School A': ['Mousetrap Vehicle'], 'duPont Manual High Scho

### IQR Method (alpha = 1.5, standard outlier formula)

In [134]:
bombed_events = {t: [] for t in teams}

def iqr(alpha: int = 1.5):
    for placement in data.get('Placings'):
        q1 = np.quantile(full_scores[placement['team']], 0.25)
        q3 = np.quantile(full_scores[placement['team']], 0.75)
        if placement.get('place', len(teams)) > ((q3 - q1) * alpha + q3) and not placement['event'] in trial_events:
            bombed_events[placement['team']].append(placement['event'])
            
    print("\"Bombed\" events by school:", utils.pretty_print(teams, bombed_events), "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))
    
iqr()

"Bombed" events by school: {'Adlai E. Stevenson High School A': ['Mission Possible', 'Wright Stuff'], 'Adlai E. Stevenson High School B': ['Write It Do It'], 'Archbishop Hoban High School A': [], 'Archbishop Hoban High School B': [], 'Aurora High School ': [], 'Avon Lake High School ': [], 'Bayard Rustin High School A': [], 'Bayard Rustin High School B': [], 'Beachwood High School A': ['Geologic Mapping'], 'Beachwood High School B': ['Fermi Questions'], 'Bio-Med Science Academy ': [], 'Boyceville High School A': [], 'Boyceville High School B': [], 'Brecksville-Broadview Heights High School ': [], 'Charles F. Brush High School ': [], 'Chagrin Falls High School A': [], 'Chagrin Falls High School B': [], 'Chardon High School A': [], 'Chardon High School B': [], 'Columbus Academy ': [], 'duPont Manual High School A': ['Mousetrap Vehicle'], 'duPont Manual High School B': [], 'Horizon Science Academy Cleveland High School ': [], 'Hudson High School A': [], 'Hudson High School B': [], 'Intern

In [135]:
bombed_events = {t: [] for t in teams}

def std_deviation(alpha: int = 1.5):
    for placement in data.get('Placings'):
        mean = np.mean(full_scores[placement['team']])
        std = np.std(full_scores[placement['team']])
        if placement.get('place', len(teams)) > (mean + alpha * std) and not placement['event'] in trial_events:
            bombed_events[placement['team']].append(placement['event'])
            
    print("\"Bombed\" events by school:", utils.pretty_print(teams, bombed_events), "\n")
    print("Average \"bombed events\": ", sum([len(bombed_events[t]) for t in bombed_events])/len(teams))
    
std_deviation()

"Bombed" events by school: {'Adlai E. Stevenson High School A': ['Mission Possible', 'Wright Stuff'], 'Adlai E. Stevenson High School B': ['Wright Stuff', 'Write It Do It'], 'Archbishop Hoban High School A': ['Mousetrap Vehicle'], 'Archbishop Hoban High School B': [], 'Aurora High School ': [], 'Avon Lake High School ': [], 'Bayard Rustin High School A': ['Designer Genes'], 'Bayard Rustin High School B': [], 'Beachwood High School A': ['Anatomy and Physiology', 'Geologic Mapping', 'Mission Possible'], 'Beachwood High School B': ['Astronomy', 'Chemistry Lab', 'Fermi Questions'], 'Bio-Med Science Academy ': [], 'Boyceville High School A': ['Thermodynamics'], 'Boyceville High School B': [], 'Brecksville-Broadview Heights High School ': ['Circuit Lab', 'Designer Genes', 'Thermodynamics'], 'Charles F. Brush High School ': [], 'Chagrin Falls High School A': ['Experimental Design', 'Mission Possible', 'Wright Stuff'], 'Chagrin Falls High School B': [], 'Chardon High School A': ['Astronomy', '

## Consistent Flaws
All of these methods are too sensitive to "top" teams while do not account nearly as well for lower ranked teams. A possible solution would be a scalar multiplier based on the team's rank. This would have a greater account for bombs even when the mean/median place on a team is lower. 

# Recalculating scores with auto drop "bombed" events


In [136]:
method: typing.Callable = std_deviation

# run method
bombed_events = {t: [] for t in teams} # clear map

alpha = 1
method(alpha)

score_copy = full_scores.copy()
drops = round(sum([len(bombed_events[t]) for t in bombed_events])/len(teams))

for _ in range(drops):
    for team in score_copy:
        score_copy[team].remove(max(score_copy[team]))


"Bombed" events by school: {'Adlai E. Stevenson High School A': ['Fermi Questions', 'Mission Possible', 'Wright Stuff'], 'Adlai E. Stevenson High School B': ['Herpetology', 'Wright Stuff', 'Write It Do It'], 'Archbishop Hoban High School A': ['Astronomy', 'Mousetrap Vehicle', 'Water Quality'], 'Archbishop Hoban High School B': ['Disease Detectives', 'Geologic Mapping', 'Herpetology', 'Mission Possible', 'Mousetrap Vehicle', 'Protein Modeling', 'Thermodynamics', 'Wright Stuff'], 'Aurora High School ': ['Codebusters', 'Forensics', 'Mission Possible', 'Mousetrap Vehicle', 'Protein Modeling'], 'Avon Lake High School ': [], 'Bayard Rustin High School A': ['Chemistry Lab', 'Designer Genes', 'Experimental Design', 'Geologic Mapping'], 'Bayard Rustin High School B': ['Astronomy', 'Chemistry Lab', 'Designer Genes', 'Thermodynamics', 'Wright Stuff'], 'Beachwood High School A': ['Anatomy and Physiology', 'Geologic Mapping', 'Mission Possible', 'Wright Stuff'], 'Beachwood High School B': ['Astrono

## Recalculate Ranking

In [137]:
score_with_drops = {t: sum(score_copy[t]) for t in score_copy}

sorted_scores = dict(sorted(score_with_drops.items(), key=lambda item: item[1]))
print(f"After dropping {drops} events: \n")
print(utils.pretty_print(teams, sorted_scores))


After dropping 3 events: 

{'Solon High School ': 112, 'New Trier High School A': 118, 'Adlai E. Stevenson High School B': 133, 'Mason High School A': 138, 'Mason High School B': 138, 'Adlai E. Stevenson High School A': 152, 'Northville High School B': 156, 'Bayard Rustin High School A': 200, 'Beachwood High School A': 258, 'Mentor High School A': 262, 'Mentor High School B': 264, 'Shady Side Academy Senior School A': 270, 'International Academy Central A': 302, 'duPont Manual High School A': 303, 'New Trier High School B': 364, 'Hudson High School A': 368, 'Mayfield High School A': 373, 'St. Ignatius High School A': 381, 'Northville High School A': 387, 'Westlake High School A': 438, 'Solon High School Trial A': 444, 'Beachwood High School B': 482, 'Boyceville High School A': 504, 'Olentangy Liberty High School A': 504, 'duPont Manual High School B': 507, 'St. Edward High School A': 514, 'Solon High School Trial B': 532, 'Brecksville-Broadview Heights High School ': 539, 'Medina High 