In [393]:
from os import listdir
from os.path import isfile, join
import numpy as np
import pandas as pd
from IPython.display import display
import datetime
import re
import csv
from geopy.geocoders import Nominatim

mypath = r"C:\Users\justr\Documents\dci-2024\raw data"
files = [f for f in listdir(mypath) if isfile(join(mypath, f))]

with open('inputs/corps_data.csv', 'r') as f:
  list_of_corps = csv.reader(f)
  list_of_corps = [x[0] for x in list_of_corps]

def export(df: pd.DataFrame, path_no_dot: str, index=True):
    df.to_csv(f'exports/{path_no_dot}.csv', index=index)
    df.to_excel(f'exports/{path_no_dot}.xlsx', index=index)

# for f in files:
#     print(f)

In [394]:
scores_list = list()
corps_dict = dict()

scores_table = pd.DataFrame(np.zeros([1,18]))
scores_table.columns = ['Corps','Date','Location','Performance Slot','General Effect 1','General Effect 2','General Effect Total','Visual Proficiency','Visual Analysis','Color Guard','Visual Total','Music Brass','Music Analysis','Music Percussion','Music Total','Sub Total','Penalty','Total Score']


comps_data = list()

corps_classes = ['DCI World Class', 'DCI Open Class','DCI All Age World Class','DCI All Age Open Class','DCI All Age Class A']

for (show_number, file) in enumerate(files):
    # print(file)

    df_list = pd.read_html("raw data/"+file)
    df = df_list[-1]
    df = df.applymap(lambda x: re.sub('[^!-~]+',' ',x).strip() if type(x) == str else x)

    date_and_place_and_comp = df.iloc[0, 0]
    comp = None
    if len(date_and_place_and_comp.split(sep='--')) > 1:
        (date_and_place, comp) = date_and_place_and_comp.split(sep='--')
    else:
        date_and_place = date_and_place_and_comp
    chunks = date_and_place.split(sep=' ')
    place = " ".join(chunks[1:])


    # creating corps static file:
    corps_class = None
    running_name = ""
    for r in df.iloc[4:,0]: # this slice could be buggy, blind assumption that we can always skip the first 4 lines
        # print(f"r is {r}")
        if r in corps_classes:
            corps_class = r
        if corps_class != r:
            if running_name == "":
                running_name = str(r)
            else:
                if type(r) is str:
                    running_name += " " + r
                if running_name != 'nan':
                    corps_dict.update({running_name: corps_class})
                    running_name = ""
    
    for (i, r) in enumerate(df[0]):
        if type(r) == str:
            if r in corps_classes:
                df.drop(i, inplace=True)


    df = df.iloc[4:-1].reset_index(drop=True)


    # grab names
    names = df[0].reset_index(drop=True)

    clean_names = list()
    for (i, r) in enumerate(names):
        if (i % 2) == 0:
            running_name = str(r)
        else:
            try:
                float(r)
            except:
                running_name += " " + r
            clean_names.append(running_name)
    # print(clean_names)

    df = df.iloc[::2, :].reset_index(drop=True)
    df.columns = ['Corps','Performance Slot','General Effect 1','General Effect 2','General Effect Total','Visual Proficiency','Visual Analysis','Color Guard','Visual Total','Music Brass','Music Analysis','Music Percussion','Music Total','Sub Total','Penalty','Total Score']
    df['Corps'] = clean_names
    df.insert(1,column='Date',value=pd.to_datetime(chunks[0]))
    df.insert(2,column='Location',value=place)
    df.insert(0,column='Show Number',value=show_number+1)
    df.iloc[:,5:] = df.iloc[:,5:].astype(float)

    
    for (i, r) in df.iterrows():
        if np.isnan(r['Total Score']):
            df.drop(i, inplace=True)            
    
    df['Performance Slot'] = df['Performance Slot'].astype(int)
    # num_performances = df['Performance Slot'].max()


    scores_list.append(df)
    comps_data.append([place, comp])#, num_performances])

scores_table = pd.concat(scores_list)
scores_table.reset_index(inplace=True,drop=True)

scores_table['Days from Season Start'] = (scores_table['Date'] - scores_table['Date'].min()).apply(lambda x: x.days)

corps_table = pd.DataFrame(corps_dict.items(), columns=['Corps', 'Class'])
corps_table.set_index('Corps',inplace=True)

corps_rename_dict = {
    "Vanguard": "Santa Clara Vanguard",
    "Academy": "The Academy",
    "Cavaliers": "The Cavaliers",
    "Bushwackers": "Bushwackers Drum Corps",
    "Connecticut Hurricanes": "Hurricanes",
    "Battalion": "The Battalion"
}

for (k, v) in corps_rename_dict.items():
    corps_table.rename(index={k: v}, inplace=True)
    scores_table.replace(k, v, inplace=True)

shell_corps_table = pd.read_csv('inputs/corps_data.csv').set_index('Corps')
corps_table = shell_corps_table.join(other=corps_table,how='outer')

comps_table = pd.DataFrame(comps_data, columns=['Location', 'DCI Name'])#,'Number of Performances'])

for corps in corps_table.index:
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    if scores_table_corps.shape[0] == 0:
        continue
    scores_table_corps_last = scores_table_corps.iloc[-1,:].to_dict()
    for (k, v) in scores_table_corps_last.items():
        if k != "Corps":
            corps_table.loc[corps,k] = v
    # corps_table.loc[corps,"Average Performace Slot"] = scores_table_corps['Performance Slot'].mean() / comps_table.loc[]

corps_table['Letters in Name'] = [len(x.replace(' ','')) for x in list(corps_table.index)]

# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(corps_table)
#     display(scores_table)
#     display(comps_table)

In [395]:
current_scores = dict()
latest_perf = dict()

for corps in set(scores_table['Corps']):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    current_scores.update({corps: list(scores_table_corps['Total Score'])[-1]})
    latest_perf.update({corps: list(scores_table_corps['Date'])[-1]})

corps_table = corps_table.join(pd.DataFrame(current_scores.items(),columns=['Corps','Latest Score']).set_index('Corps'),how='outer')
corps_table = corps_table.join(pd.DataFrame(latest_perf.items(),columns=['Corps','Last Performance Date']).set_index('Corps'),how='outer')
corps_table.sort_values(by=['Latest Score','Corps'],inplace=True,ascending=[False, True])
display(corps_table)

Unnamed: 0_level_0,Home Location,Latitude,Longitude,Class,Show Number,Date,Location,Performance Slot,General Effect 1,General Effect 2,...,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start,Letters in Name,Latest Score,Last Performance Date
Corps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Bluecoats,"Canton, OH",40.798546,-81.374951,DCI World Class,34.0,2024-07-14,"Ankeny, Iowa",2.0,17.7,17.6,...,18.0,17.9,26.5,88.35,0.0,88.35,18.0,9,88.35,2024-07-14
Boston Crusaders,"Boston, MA",42.355433,-71.060511,DCI World Class,32.0,2024-07-13,"Little Rock, Arkansas",4.0,17.3,17.0,...,16.8,17.7,25.85,86.15,0.0,86.15,17.0,15,86.15,2024-07-13
Reading Buccaneers,"Reading, PA",40.335345,-75.927949,DCI All Age World Class,29.0,2024-07-13,"Clifton, New Jersey",3.0,17.5,17.3,...,17.4,16.9,25.5,85.85,0.0,85.85,17.0,17,85.85,2024-07-13
Blue Devils,"Concord, CA",37.976852,-122.033562,DCI World Class,30.0,2024-07-13,"Fort Collins, Colorado",8.0,17.0,17.3,...,17.1,16.9,25.55,85.55,0.0,85.55,17.0,10,85.55,2024-07-13
Carolina Crown,"Fort Mill, SC",35.00737,-80.945076,DCI World Class,32.0,2024-07-13,"Little Rock, Arkansas",5.0,17.2,16.6,...,17.1,17.0,25.8,85.25,0.0,85.25,17.0,13,85.25,2024-07-13
Phantom Regiment,"Rockford, IL",42.271394,-89.093966,DCI World Class,31.0,2024-07-13,"DeKalb, Illinois",6.0,16.8,17.0,...,16.8,17.2,25.65,84.75,0.0,84.75,17.0,15,84.75,2024-07-13
Bushwackers Drum Corps,"Princeton, NJ",40.349695,-74.659738,DCI All Age World Class,29.0,2024-07-13,"Clifton, New Jersey",1.0,17.1,17.0,...,16.8,17.1,24.8,84.0,0.0,84.0,17.0,20,84.0,2024-07-13
Santa Clara Vanguard,"Santa Clara, CA",37.233325,-121.684635,DCI World Class,30.0,2024-07-13,"Fort Collins, Colorado",1.0,16.5,16.3,...,16.0,17.1,24.8,82.0,0.0,82.0,17.0,18,82.0,2024-07-13
Hawthorne Caballeros,"Hawthorne, NJ",40.949265,-74.153755,DCI All Age World Class,29.0,2024-07-13,"Clifton, New Jersey",4.0,16.4,16.6,...,16.9,16.7,24.8,81.95,0.0,81.95,17.0,19,81.95,2024-07-13
Mandarins,"Sacramento, CA",38.581061,-121.493895,DCI World Class,30.0,2024-07-13,"Fort Collins, Colorado",7.0,16.2,16.6,...,16.6,16.1,24.4,81.7,0.0,81.7,17.0,9,81.7,2024-07-13


In [396]:
today = datetime.date.today()
day_of_season = (pd.Timestamp(today) - scores_table['Date'].min()).days

In [397]:
# determine difference in score by comp

growth_fit_xab = lambda x,a,b: (a * x) + b

scores_table_diff = scores_table.copy(deep=True)
scores_table_diff.iloc[:,5:] = 0.0
scores_table_rate = scores_table.copy(deep=True)
scores_table_rate.iloc[:,5:] = 0.0

for corps in set(scores_table['Corps']):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    scores_table_diff_corps = scores_table_diff.loc[scores_table_diff['Corps'] == corps]
    scores_table_rate_corps = scores_table_rate.loc[scores_table_rate['Corps'] == corps]
    
    for c in scores_table_corps.columns:
        if c not in ['Corps','Location']:
            scores_table_diff_corps.loc[:,c] = scores_table_corps.loc[:,c].diff()
    
    for c in scores_table_corps.columns:
        if c not in ['Corps','Location']:
            scores_table_rate_corps.loc[:,c] = scores_table_diff_corps.loc[:,c] / scores_table_diff_corps.loc[:,"Date"].apply(lambda x: x.days)
    
    for (i, r) in scores_table_diff_corps.iterrows():
        scores_table_diff.iloc[i,:] = r
    
    for (i, r) in scores_table_rate_corps.iterrows():
        scores_table_rate.iloc[i,:] = r

    if int(scores_table_corps.shape[0]) > 2: # must have more than 2 data points lowkey
        ((a, b), cov) = np.polyfit(scores_table_corps['Days from Season Start'].astype(int), scores_table_corps['Total Score'].astype(float), 1, cov=True)
        (sa, sb) = np.sqrt(np.diag(cov))
        corps_table.loc[corps, "RMSE"] = (scores_table_corps['Total Score'] - scores_table_corps['Days from Season Start'].apply(lambda x: growth_fit_xab(x, a, b))).std()

        for i in range(1,7+1):
            est = growth_fit_xab(day_of_season + i, a + (sa * 0), b + (sb * 0))
            upper = growth_fit_xab(day_of_season + i, a + (sa * i), b + (sb * i))
            lower = growth_fit_xab(day_of_season + i, a + (sa * -i), b + (sb * -i))
            est = 100.0 if est > 100.0 else est
            upper = 100.0 if upper > 100.0 else upper
            lower = 100.0 if lower > 100.0 else lower
            corps_table.loc[corps, f"D+{i}"] = 0.0 if est < 0.0 else est
            corps_table.loc[corps, f"D+{i} U"] = 0.0 if upper < 0.0 else upper
            corps_table.loc[corps, f"D+{i} L"] = 0.0 if lower < 0.0 else lower
    
    corps_table.loc[corps,'Number of Shows'] = int(scores_table_corps.shape[0])
    corps_table.loc[corps, 'Average Rest'] = scores_table_diff_corps.loc[:,'Date'].apply(lambda x: x.days).mean()
    corps_table.loc[corps, 'Average Score Improvement'] = scores_table_rate_corps.loc[:,'Total Score'].mean()
    
    # normal ranges from 0 to 1
    corps_table.loc[corps,'Average of Last 3 Scores'] = scores_table_corps['Total Score'].iloc[-3:].mean()
    corps_table.loc[corps,'Average Normal General Effect'] = scores_table_corps['General Effect Total'].iloc[-3:].mean() / 40
    corps_table.loc[corps,'Average Normal Visual'] = scores_table_corps['Visual Total'].iloc[-3:].mean() / 30
    corps_table.loc[corps,'Average Normal Music'] = scores_table_corps['Music Total'].iloc[-3:].mean() / 30
    corps_table.loc[corps,'Average Normal Brass'] = scores_table_corps['Music Brass'].iloc[-3:].mean() / 20
    corps_table.loc[corps,'Average Normal Percussion'] = scores_table_corps['Music Percussion'].iloc[-3:].mean() / 20

    if len(set(corps_table.loc[corps, ['Average Normal General Effect', 'Average Normal Visual', 'Average Normal Music']].values)) < len(list(corps_table.loc[corps, ['Average Normal General Effect', 'Average Normal Visual', 'Average Normal Music']].values)):
        corps_table.loc[corps, 'Best Caption'] = 'Multiple'
    else:
        max_column_index = np.argmax(corps_table.loc[corps, ['Average Normal General Effect', 'Average Normal Visual', 'Average Normal Music']].values)
        corps_table.loc[corps, 'Best Caption'] = ['General Effect', 'Visual', 'Music'][max_column_index]


    corps_table.loc[corps, 'Best Music'] = ['Brass', 'Percussion'][int(corps_table.loc[corps, 'Average Normal Brass'] < corps_table.loc[corps, 'Average Normal Percussion'])] if corps_table.loc[corps, 'Average Normal Brass'] != corps_table.loc[corps, 'Average Normal Percussion'] else 'Neither'

    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        if corps == "Phantom Regiment":
            display(scores_table_corps)
            display(scores_table_diff_corps)
            display(scores_table_rate_corps)

corps_table['Rank of Last 3 Scores'] = corps_table['Average of Last 3 Scores'].rank(ascending=False, method='max')
corps_table['Rank of Last Score'] = corps_table['Latest Score'].rank(ascending=False, method='max')
corps_table['Rank of Last 3 Scores No All-Age'] = corps_table.loc[['All Age' not in str(x) for x in corps_table['Class']], 'Average of Last 3 Scores'].rank(ascending=False, method='max')
corps_table['Rank of Last Score No All-Age'] = corps_table.loc[['All Age' not in str(x) for x in corps_table['Class']], 'Latest Score'].rank(ascending=False, method='max')

def ordinal(n):
    if str(n) == 'nan':
        return None
    else:
        return "%d%s" % (int(n),"tsnrhtdd"[(int(n)//10%10!=1)*(int(n)%10<4)*int(n)%10::4])
    
corps_table['Ordinal Rank of Last 3 Scores'] = corps_table['Rank of Last 3 Scores'].apply(ordinal)
corps_table['Ordinal Rank of Last Score'] = corps_table['Rank of Last Score'].apply(ordinal)
corps_table['Ordinal Rank of Last 3 Scores No All-Age'] = corps_table['Rank of Last 3 Scores No All-Age'].apply(ordinal)
corps_table['Ordinal Rank of Last Score No All-Age'] = corps_table['Rank of Last Score No All-Age'].apply(ordinal)


championship_rounds_table = pd.DataFrame([None,'Prelims','Semis','Finals'],columns=['Championship Rounds'])

def calc_championship_round(rank: float):
    if rank <= 12:
        return championship_rounds_table['Championship Rounds'][3]
    elif rank <= 25:
        return championship_rounds_table['Championship Rounds'][2]
    elif rank <= 40:
        return championship_rounds_table['Championship Rounds'][1]
    else:
        return championship_rounds_table['Championship Rounds'][0]

corps_table['Projected Championship Round'] = corps_table['Rank of Last 3 Scores No All-Age'].apply(calc_championship_round)

captions_table = pd.DataFrame(['General Effect','Visual','Music','Multiple','Brass','Percussion','Neither'],columns=['Caption Title'])

with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(corps_table)

Unnamed: 0,Show Number,Corps,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start
1,1,Phantom Regiment,2024-06-26,"Rockford, Michigan",2,14.8,14.2,29.0,14.0,13.6,13.6,20.6,13.8,14.2,13.1,20.55,70.15,0.0,70.15,0
7,2,Phantom Regiment,2024-06-28,"Muncie, Indiana",3,14.5,14.7,29.2,14.5,14.2,13.8,21.25,14.4,14.7,14.3,21.7,72.15,0.0,72.15,2
44,7,Phantom Regiment,2024-06-30,"Lisle, Illinois",4,15.0,14.7,29.7,15.0,14.5,14.6,22.05,14.5,14.6,15.0,22.05,73.8,0.0,73.8,4
62,9,Phantom Regiment,2024-07-02,"Mason, OH",1,15.6,15.0,30.6,15.2,14.7,15.2,22.55,14.8,15.1,15.0,22.45,75.6,0.0,75.6,6
77,13,Phantom Regiment,2024-07-05,"Rockford, Illinois",4,15.7,15.3,31.0,15.6,15.6,15.4,23.3,15.3,15.3,15.8,23.2,77.5,0.0,77.5,9
99,16,Phantom Regiment,2024-07-06,"Whitewater, Wisconsin",2,15.9,15.8,31.7,15.8,15.9,15.8,23.75,15.4,16.0,16.0,23.7,79.15,0.0,79.15,10
106,17,Phantom Regiment,2024-07-07,"LaCrosse, Wisconsin",2,15.8,16.9,32.7,16.0,16.1,16.1,24.1,15.6,16.1,16.0,23.85,80.65,0.0,80.65,11
134,22,Phantom Regiment,2024-07-09,"Mankto, Minnesota",3,16.3,16.4,32.7,16.1,16.6,16.5,24.6,15.8,16.3,16.1,24.1,81.4,0.0,81.4,13
155,25,Phantom Regiment,2024-07-11,"Dubuque, Iowa",3,16.4,16.6,33.0,16.5,16.7,16.8,25.0,16.6,16.4,16.8,24.9,82.9,0.0,82.9,15
195,31,Phantom Regiment,2024-07-13,"DeKalb, Illinois",6,16.8,17.0,33.8,16.9,16.9,16.8,25.3,17.3,16.8,17.2,25.65,84.75,0.0,84.75,17


Unnamed: 0,Show Number,Corps,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start
1,,Phantom Regiment,NaT,"Rockford, Michigan",,,,,,,,,,,,,,,,
7,1.0,Phantom Regiment,2 days 00:00:00,"Muncie, Indiana",1.0,-0.3,0.5,0.2,0.5,0.6,0.2,0.65,0.6,0.5,1.2,1.15,2.0,0.0,2.0,2.0
44,5.0,Phantom Regiment,2 days 00:00:00,"Lisle, Illinois",1.0,0.5,0.0,0.5,0.5,0.3,0.8,0.8,0.1,-0.1,0.7,0.35,1.65,0.0,1.65,2.0
62,2.0,Phantom Regiment,2 days 00:00:00,"Mason, OH",-3.0,0.6,0.3,0.9,0.2,0.2,0.6,0.5,0.3,0.5,0.0,0.4,1.8,0.0,1.8,2.0
77,4.0,Phantom Regiment,3 days 00:00:00,"Rockford, Illinois",3.0,0.1,0.3,0.4,0.4,0.9,0.2,0.75,0.5,0.2,0.8,0.75,1.9,0.0,1.9,3.0
99,3.0,Phantom Regiment,1 days 00:00:00,"Whitewater, Wisconsin",-2.0,0.2,0.5,0.7,0.2,0.3,0.4,0.45,0.1,0.7,0.2,0.5,1.65,0.0,1.65,1.0
106,1.0,Phantom Regiment,1 days 00:00:00,"LaCrosse, Wisconsin",0.0,-0.1,1.1,1.0,0.2,0.2,0.3,0.35,0.2,0.1,0.0,0.15,1.5,0.0,1.5,1.0
134,5.0,Phantom Regiment,2 days 00:00:00,"Mankto, Minnesota",1.0,0.5,-0.5,0.0,0.1,0.5,0.4,0.5,0.2,0.2,0.1,0.25,0.75,0.0,0.75,2.0
155,3.0,Phantom Regiment,2 days 00:00:00,"Dubuque, Iowa",0.0,0.1,0.2,0.3,0.4,0.1,0.3,0.4,0.8,0.1,0.7,0.8,1.5,0.0,1.5,2.0
195,6.0,Phantom Regiment,2 days 00:00:00,"DeKalb, Illinois",3.0,0.4,0.4,0.8,0.4,0.2,0.0,0.3,0.7,0.4,0.4,0.75,1.85,0.0,1.85,2.0


Unnamed: 0,Show Number,Corps,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start
1,,Phantom Regiment,NaT,"Rockford, Michigan",,,,,,,,,,,,,,,,
7,0.5,Phantom Regiment,1 days 00:00:00,"Muncie, Indiana",0.5,-0.15,0.25,0.1,0.25,0.3,0.1,0.325,0.3,0.25,0.6,0.575,1.0,0.0,1.0,1.0
44,2.5,Phantom Regiment,1 days 00:00:00,"Lisle, Illinois",0.5,0.25,0.0,0.25,0.25,0.15,0.4,0.4,0.05,-0.05,0.35,0.175,0.825,0.0,0.825,1.0
62,1.0,Phantom Regiment,1 days 00:00:00,"Mason, OH",-1.5,0.3,0.15,0.45,0.1,0.1,0.3,0.25,0.15,0.25,0.0,0.2,0.9,0.0,0.9,1.0
77,1.333333,Phantom Regiment,1 days 00:00:00,"Rockford, Illinois",1.0,0.033333,0.1,0.133333,0.133333,0.3,0.066667,0.25,0.166667,0.066667,0.266667,0.25,0.633333,0.0,0.633333,1.0
99,3.0,Phantom Regiment,1 days 00:00:00,"Whitewater, Wisconsin",-2.0,0.2,0.5,0.7,0.2,0.3,0.4,0.45,0.1,0.7,0.2,0.5,1.65,0.0,1.65,1.0
106,1.0,Phantom Regiment,1 days 00:00:00,"LaCrosse, Wisconsin",0.0,-0.1,1.1,1.0,0.2,0.2,0.3,0.35,0.2,0.1,0.0,0.15,1.5,0.0,1.5,1.0
134,2.5,Phantom Regiment,1 days 00:00:00,"Mankto, Minnesota",0.5,0.25,-0.25,0.0,0.05,0.25,0.2,0.25,0.1,0.1,0.05,0.125,0.375,0.0,0.375,1.0
155,1.5,Phantom Regiment,1 days 00:00:00,"Dubuque, Iowa",0.0,0.05,0.1,0.15,0.2,0.05,0.15,0.2,0.4,0.05,0.35,0.4,0.75,0.0,0.75,1.0
195,3.0,Phantom Regiment,1 days 00:00:00,"DeKalb, Illinois",1.5,0.2,0.2,0.4,0.2,0.1,0.0,0.15,0.35,0.2,0.2,0.375,0.925,0.0,0.925,1.0


Unnamed: 0_level_0,Home Location,Latitude,Longitude,Class,Show Number,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start,Letters in Name,Latest Score,Last Performance Date,RMSE,D+1,D+1 U,D+1 L,D+2,D+2 U,D+2 L,D+3,D+3 U,D+3 L,D+4,D+4 U,D+4 L,D+5,D+5 U,D+5 L,D+6,D+6 U,D+6 L,D+7,D+7 U,D+7 L,Number of Shows,Average Rest,Average Score Improvement,Average of Last 3 Scores,Average Normal General Effect,Average Normal Visual,Average Normal Music,Average Normal Brass,Average Normal Percussion,Best Caption,Best Music,Rank of Last 3 Scores,Rank of Last Score,Rank of Last 3 Scores No All-Age,Rank of Last Score No All-Age,Ordinal Rank of Last 3 Scores,Ordinal Rank of Last Score,Ordinal Rank of Last 3 Scores No All-Age,Ordinal Rank of Last Score No All-Age,Projected Championship Round
Corps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1
Bluecoats,"Canton, OH",40.798546,-81.374951,DCI World Class,34.0,2024-07-14,"Ankeny, Iowa",2.0,17.7,17.6,35.3,17.6,17.7,17.8,26.55,17.1,18.0,17.9,26.5,88.35,0.0,88.35,18.0,9,88.35,2024-07-14,0.380346,89.737945,91.186972,88.288917,90.507411,93.492781,87.522041,91.276877,95.885904,86.667851,92.046344,98.366343,85.726345,92.81581,100.0,84.697525,93.585277,100.0,83.58139,94.354743,100.0,82.377941,5.0,3.0,1.048958,86.266667,0.864167,0.86,0.863333,0.856667,0.866667,General Effect,Percussion,1.0,1.0,1.0,1.0,1st,1st,1st,1st,Finals
Boston Crusaders,"Boston, MA",42.355433,-71.060511,DCI World Class,32.0,2024-07-13,"Little Rock, Arkansas",4.0,17.3,17.0,34.3,17.4,17.1,17.5,26.0,17.2,16.8,17.7,25.85,86.15,0.0,86.15,17.0,15,86.15,2024-07-13,0.641426,89.759174,91.042287,88.47606,90.584044,93.235036,87.933052,91.408914,95.512548,87.305279,92.233784,97.874826,86.592742,93.058654,100.0,85.79544,93.883524,100.0,84.913374,94.708394,100.0,83.946544,7.0,2.833333,0.711905,85.7,0.858333,0.855556,0.856111,0.845,0.86,General Effect,Percussion,2.0,2.0,2.0,2.0,2nd,2nd,2nd,2nd,Finals
Reading Buccaneers,"Reading, PA",40.335345,-75.927949,DCI All Age World Class,29.0,2024-07-13,"Clifton, New Jersey",3.0,17.5,17.3,34.8,16.4,17.5,17.2,25.55,16.7,17.4,16.9,25.5,85.85,0.0,85.85,17.0,17,85.85,2024-07-13,,,,,,,,,,,,,,,,,,,,,,,2.0,14.0,0.635714,81.4,0.8225,0.796667,0.82,0.81,0.81,General Effect,Neither,6.0,3.0,,,6th,3rd,,,
Blue Devils,"Concord, CA",37.976852,-122.033562,DCI World Class,30.0,2024-07-13,"Fort Collins, Colorado",8.0,17.0,17.3,34.3,17.3,16.8,17.3,25.7,17.1,17.1,16.9,25.55,85.55,0.0,85.55,17.0,10,85.55,2024-07-13,0.182322,87.507371,87.92289,87.091852,88.124693,88.98334,87.266045,88.742015,90.0714,87.412629,89.359337,91.18707,87.531603,89.976658,92.33035,87.622967,90.59398,93.501239,87.686722,91.211302,94.699738,87.722867,8.0,2.142857,0.568095,83.866667,0.840833,0.831667,0.842778,0.845,0.841667,Music,Brass,3.0,4.0,3.0,3.0,3rd,4th,3rd,3rd,Finals
Carolina Crown,"Fort Mill, SC",35.00737,-80.945076,DCI World Class,32.0,2024-07-13,"Little Rock, Arkansas",5.0,17.2,16.6,33.8,17.1,17.2,17.0,25.65,17.5,17.1,17.0,25.8,85.25,0.0,85.25,17.0,13,85.25,2024-07-13,0.693569,88.040969,89.977453,86.104484,88.745288,92.747834,84.742742,89.449607,95.647792,83.251423,90.153927,98.677326,81.630527,90.858246,100.0,79.880055,91.562565,100.0,78.000005,92.266885,100.0,75.990379,5.0,3.75,0.700417,82.216667,0.82,0.817778,0.829444,0.825,0.815,Music,Brass,5.0,5.0,5.0,4.0,5th,5th,5th,4th,Finals
Phantom Regiment,"Rockford, IL",42.271394,-89.093966,DCI World Class,31.0,2024-07-13,"DeKalb, Illinois",6.0,16.8,17.0,33.8,16.9,16.9,16.8,25.3,17.3,16.8,17.2,25.65,84.75,0.0,84.75,17.0,15,84.75,2024-07-13,0.38284,87.456934,88.184559,86.72931,88.311088,89.814519,86.807656,89.165241,91.492662,86.837821,90.019395,93.218987,86.819803,90.873548,94.993494,86.753602,91.727702,96.816184,86.639219,92.581855,98.687056,86.476654,10.0,1.888889,0.950926,83.016667,0.829167,0.832222,0.829444,0.828333,0.835,Visual,Percussion,4.0,6.0,4.0,5.0,4th,6th,4th,5th,Finals
Bushwackers Drum Corps,"Princeton, NJ",40.349695,-74.659738,DCI All Age World Class,29.0,2024-07-13,"Clifton, New Jersey",1.0,17.1,17.0,34.1,16.0,17.1,17.1,25.1,15.7,16.8,17.1,24.8,84.0,0.0,84.0,17.0,20,84.0,2024-07-13,,,,,,,,,,,,,,,,,,,,,,,2.0,14.0,0.682143,79.225,0.795,0.780833,0.8,0.775,0.8175,Music,Percussion,10.0,7.0,,,10th,7th,,,
Santa Clara Vanguard,"Santa Clara, CA",37.233325,-121.684635,DCI World Class,30.0,2024-07-13,"Fort Collins, Colorado",1.0,16.5,16.3,32.8,16.5,16.1,16.2,24.4,16.5,16.0,17.1,24.8,82.0,0.0,82.0,17.0,18,82.0,2024-07-13,0.541429,83.254115,84.488049,82.020182,83.880037,86.429894,81.33018,84.505958,88.45373,80.558187,85.13188,90.559555,79.704204,85.757801,92.747371,78.768231,86.383722,95.017178,77.750267,87.009644,97.368974,76.650313,8.0,2.142857,0.638571,79.716667,0.799167,0.784444,0.807222,0.801667,0.83,Music,Percussion,8.0,8.0,7.0,6.0,8th,8th,7th,6th,Finals
Hawthorne Caballeros,"Hawthorne, NJ",40.949265,-74.153755,DCI All Age World Class,29.0,2024-07-13,"Clifton, New Jersey",4.0,16.4,16.6,33.0,15.7,16.6,16.0,24.15,16.0,16.9,16.7,24.8,81.95,0.0,81.95,17.0,19,81.95,2024-07-13,,,,,,,,,,,,,,,,,,,,,,,2.0,14.0,0.546429,78.125,0.79125,0.7575,0.791667,0.7725,0.785,Music,Percussion,13.0,9.0,,,13th,9th,,,
Mandarins,"Sacramento, CA",38.581061,-121.493895,DCI World Class,30.0,2024-07-13,"Fort Collins, Colorado",7.0,16.2,16.6,32.8,16.4,16.2,16.4,24.5,16.1,16.6,16.1,24.4,81.7,0.0,81.7,17.0,9,81.7,2024-07-13,0.518323,84.394636,85.533774,83.255497,85.121457,87.474527,82.768388,85.848279,89.490071,82.206487,86.575101,91.580408,81.569795,87.301923,93.745536,80.85831,88.028745,95.985456,80.072034,88.755567,98.300168,79.210966,9.0,1.875,0.64125,80.533333,0.81,0.803889,0.800556,0.813333,0.775,General Effect,Brass,7.0,10.0,6.0,7.0,7th,10th,6th,7th,Finals


In [398]:
from pybaseball import standings

# mlb_table = pd.read_csv('mlb/mlb.csv')
# display(mlb_table)

data = standings()

mlb_table = pd.concat(data, axis=0).sort_values('W-L%',ascending=False).reset_index(drop=True)
mlb_table.index = mlb_table.index + 1

display(mlb_table)

Unnamed: 0,Tm,W,L,W-L%,GB
1,Philadelphia Phillies,62,34,0.646,--
2,Cleveland Guardians,58,37,0.611,--
3,Baltimore Orioles,58,38,0.604,--
4,New York Yankees,58,40,0.592,1.0
5,Los Angeles Dodgers,56,41,0.577,--
6,Milwaukee Brewers,55,42,0.567,--
7,Minnesota Twins,54,42,0.563,4.5
8,Atlanta Braves,53,42,0.558,8.5
9,Boston Red Sox,53,42,0.558,4.5
10,Kansas City Royals,52,45,0.536,7.0


In [399]:
import plotly.graph_objects as go
import plotly.colors as pc

(a, b) = np.polyfit(scores_table['Days from Season Start'].astype(int), scores_table['Total Score'].astype(float), 1)
growth_fit = lambda x: (a * x) + b

scores_table['Total Score Residual'] = scores_table['Total Score'] - scores_table['Days from Season Start'].apply(growth_fit)

fig = go.Figure()

# adding player
ranking = 1
trace_colors = pc.qualitative.Bold
for (i, corps) in enumerate(corps_table.index):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    scores_table_diff_corps = scores_table_diff.loc[scores_table['Corps'] == corps]
    scores_table_rate_corps = scores_table_rate.loc[scores_table['Corps'] == corps]
    fig.add_trace(go.Scatter(
        x=scores_table_corps['Date'],
        y=scores_table_corps['Total Score'],
        name=f'#{ranking} ({corps_table.loc[corps,"Latest Score"]:.3f}) {corps}',
        mode='lines+markers',
        connectgaps=True,
        line=dict(
            #shape='hv',
            color=trace_colors[i % len(trace_colors)]
        ),
        text=[
            f'<b>{x[0]}</b><br><br>' + 
            f"{x[1].strftime('%A, %d %B %Y')}<br>"
            f"{x[2]}<br><br>" +
            f"Competition Score: {x[3]:.3f}<br>"
            f"Difference: {x[4]:+.3f}<br>"
            f"Score Growth: {x[5]:+.3f} per day"
            for x in zip(
                scores_table_corps['Corps'],
                scores_table_corps['Date'],
                scores_table_corps['Location'],
                scores_table_corps['Total Score'],
                scores_table_diff_corps['Total Score'],
                scores_table_rate_corps['Total Score']
            )
        ]
    ))
    ranking += 1

updated_time = f'<i>Updated {str(datetime.datetime.now().strftime("%A, %b %d, %Y %H:%M:%S"))} CT</i>'

def active_ranking(cat):
    i = 0
    t = 0
    retList = list()
    for c in cat:
        retList.append(f"#{i+1} ({corps_table['Latest Score'][t]:.3f}) {corps_table.index[t]}")
        if c is True:
            i += 1
        t += 1
    return retList 

dropdown_labels = ["All Corps"] + corps_classes

dropdown_categories = [[True for _ in corps_table['Class']]] + [
    [x == corps_classes[i] for x in corps_table['Class']] for i in range(len(corps_classes))
]

dropdown_dicts = [
    dict(
    label=label,
    method='restyle',
    args=[{"visible": category, "name": active_ranking(category)}]
    ) for (label, category) in zip(dropdown_labels, dropdown_categories)
]


fig.update_layout(
    title=f'<b>DCI 2024 Scores by Roman Ramirez</b><br>{updated_time}<br>',
    xaxis_title='<b>Date</b>',
    yaxis_title='<b>Total Score</b>',
    updatemenus=[
        dict(
            active=0,
            buttons=dropdown_dicts
            ,
        )       
    ]
)

customdata = np.stack((
    list(scores_table['Location']),
    list(scores_table_diff['Total Score']),
    list(scores_table['Corps'])
    ), axis=0)
hovertemplate = (
    '%{text}<br>' +
    '<extra></extra>'
)

fig.update_traces(
    customdata=customdata,
    hovertemplate=hovertemplate,
    opacity=0.8,
    legendgrouptitle_text='<b>#<i>Rank</i> (<i>Latest Score</i>) <i>Corps</i></b>'
)

fig.show()
fig.write_html("index.html")

export(scores_table, 'scores', True)
export(corps_table, 'corps', True)
export(comps_table, 'comps', True)
export(captions_table, 'captions', True)
export(championship_rounds_table, 'championship_rounds', True)
export(mlb_table, 'mlb', True)

#initialze the excel writer
writer = pd.ExcelWriter('exports/dci_data.xlsx', engine='xlsxwriter')

#store your dataframes in a  dict, where the key is the sheet name you want
frames = {'scores': scores_table, 'corps': corps_table, 'comps': comps_table, 'captions': captions_table, 'championship rounds': championship_rounds_table, 'mlb': mlb_table}

#now loop thru and put each on a specific sheet
for sheet, frame in  frames.items(): # .use .items for python 3.X
    frame.to_excel(writer, sheet_name = sheet)

#critical last step
writer.close()

In [400]:
#to dos

# in import, create "static" corps master file
# # contains class, and other corps-related info

# SQL join this on scores table
# add buttons to filter by class