In [75]:
from os import listdir
from os.path import isfile, join
import numpy as np
import pandas as pd
from IPython.display import display
import datetime
import re
import csv
from geopy.geocoders import Nominatim

mypath = r"C:\Users\justr\Documents\dci-2024\raw data"
files = [f for f in listdir(mypath) if isfile(join(mypath, f))]

with open('inputs/corps_data.csv', 'r') as f:
  list_of_corps = csv.reader(f)
  list_of_corps = [x[0] for x in list_of_corps]

def export(df: pd.DataFrame, path_no_dot: str, index=True):
    df.to_csv(f'exports/{path_no_dot}.csv', index=index)
    df.to_excel(f'exports/{path_no_dot}.xlsx', index=index)

# for f in files:
#     print(f)

In [76]:
scores_list = list()
corps_dict = dict()

scores_table = pd.DataFrame(np.zeros([1,18]))
scores_table.columns = ['Corps','Date','Location','Performance Slot','General Effect 1','General Effect 2','General Effect Total','Visual Proficiency','Visual Analysis','Color Guard','Visual Total','Music Brass','Music Analysis','Music Percussion','Music Total','Sub Total','Penalty','Total Score']


comps_data = list()

corps_classes = ['DCI World Class', 'DCI Open Class','DCI All Age World Class','DCI All Age Open Class','DCI All Age Class A']

for (show_number, file) in enumerate(files):
    # print(file)

    df_list = pd.read_html("raw data/"+file)
    df = df_list[-1]
    df = df.applymap(lambda x: re.sub('[^!-~]+',' ',x).strip() if type(x) == str else x)

    date_and_place_and_comp = df.iloc[0, 0]
    comp = None
    if len(date_and_place_and_comp.split(sep='--')) > 1:
        (date_and_place, comp) = date_and_place_and_comp.split(sep='--')
    else:
        date_and_place = date_and_place_and_comp
    chunks = date_and_place.split(sep=' ')
    place = " ".join(chunks[1:])


    # creating corps static file:
    corps_class = None
    running_name = ""
    for r in df.iloc[4:,0]: # this slice could be buggy, blind assumption that we can always skip the first 4 lines
        # print(f"r is {r}")
        if r in corps_classes:
            corps_class = r
        if corps_class != r:
            if running_name == "":
                running_name = str(r)
            else:
                if type(r) is str:
                    running_name += " " + r
                if running_name != 'nan':
                    corps_dict.update({running_name: corps_class})
                    running_name = ""
    
    for (i, r) in enumerate(df[0]):
        if type(r) == str:
            if r in corps_classes:
                df.drop(i, inplace=True)


    df = df.iloc[4:-1].reset_index(drop=True)


    # grab names
    names = df[0].reset_index(drop=True)

    clean_names = list()
    for (i, r) in enumerate(names):
        if (i % 2) == 0:
            running_name = str(r)
        else:
            try:
                float(r)
            except:
                running_name += " " + r
            clean_names.append(running_name)
    # print(clean_names)

    df = df.iloc[::2, :].reset_index(drop=True)
    df.columns = ['Corps','Performance Slot','General Effect 1','General Effect 2','General Effect Total','Visual Proficiency','Visual Analysis','Color Guard','Visual Total','Music Brass','Music Analysis','Music Percussion','Music Total','Sub Total','Penalty','Total Score']
    df['Corps'] = clean_names
    df.insert(1,column='Date',value=pd.to_datetime(chunks[0]))
    df.insert(2,column='Location',value=place)
    df.insert(0,column='Show Number',value=show_number+1)
    df.iloc[:,5:] = df.iloc[:,5:].astype(float)

    
    for (i, r) in df.iterrows():
        if np.isnan(r['Total Score']):
            df.drop(i, inplace=True)            
    
    df['Performance Slot'] = df['Performance Slot'].astype(int)
    # num_performances = df['Performance Slot'].max()


    scores_list.append(df)
    comps_data.append([place, comp])#, num_performances])

scores_table = pd.concat(scores_list)
scores_table.reset_index(inplace=True,drop=True)

scores_table['Days from Season Start'] = (scores_table['Date'] - scores_table['Date'].min()).apply(lambda x: x.days)

corps_table = pd.DataFrame(corps_dict.items(), columns=['Corps', 'Class'])
corps_table.set_index('Corps',inplace=True)

corps_rename_dict = {
    "Vanguard": "Santa Clara Vanguard",
    "Academy": "The Academy",
    "Cavaliers": "The Cavaliers",
    "Bushwackers": "Bushwackers Drum Corps",
    "Connecticut Hurricanes": "Hurricanes"
}

for (k, v) in corps_rename_dict.items():
    corps_table.rename(index={k: v}, inplace=True)
    scores_table.replace(k, v, inplace=True)

shell_corps_table = pd.read_csv('inputs/corps_data.csv').set_index('Corps')
corps_table = shell_corps_table.join(other=corps_table,how='outer')

comps_table = pd.DataFrame(comps_data, columns=['Location', 'DCI Name'])#,'Number of Performances'])

for corps in corps_table.index:
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    if scores_table_corps.shape[0] == 0:
        continue
    scores_table_corps_last = scores_table_corps.iloc[-1,:].to_dict()
    for (k, v) in scores_table_corps_last.items():
        if k != "Corps":
            corps_table.loc[corps,k] = v
    # corps_table.loc[corps,"Average Performace Slot"] = scores_table_corps['Performance Slot'].mean() / comps_table.loc[]

corps_table['Letters in Name'] = [len(x.replace(' ','')) for x in list(corps_table.index)]

with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(corps_table)
    display(scores_table)
    display(comps_table)

Unnamed: 0_level_0,Home Location,Latitude,Longitude,Class,Show Number,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start,Letters in Name
Corps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
7th Regiment,"New London, CT",41.355619,-72.09978,DCI Open Class,5.0,2024-06-29,"Shelton, Connecticut",1.0,11.1,11.2,22.3,8.8,12.4,10.9,16.05,8.9,11.1,10.0,15.0,53.35,0.0,53.35,3.0,11
Atlanta CV,"Atlanta, GA",33.748992,-84.390264,DCI All Age World Class,23.0,2024-07-09,"Newnan, Georgia",1.0,15.5,15.0,30.5,15.8,15.3,15.2,23.15,16.2,15.4,14.4,23.0,76.65,0.0,76.65,13.0,9
Blue Devils,"Concord, CA",37.976852,-122.033562,DCI World Class,18.0,2024-07-07,"Sacramento, California",4.0,16.5,16.4,32.9,16.2,16.8,16.0,24.5,16.5,16.4,16.4,24.65,82.05,0.0,82.05,11.0,10
Blue Devils B,"Concord, CA",37.976852,-122.033562,DCI Open Class,18.0,2024-07-07,"Sacramento, California",5.0,14.1,14.2,28.3,12.9,13.6,12.6,19.55,12.8,13.7,14.0,20.25,68.1,0.0,68.1,11.0,11
Blue Devils C,"Concord, CA",37.976852,-122.033562,DCI Open Class,18.0,2024-07-07,"Sacramento, California",2.0,11.0,10.4,21.4,9.3,11.3,10.0,15.3,10.2,10.7,10.2,15.55,52.25,0.0,52.25,11.0,11
Blue Knights,"Denver, CO",39.739236,-104.984862,DCI World Class,21.0,2024-07-09,"Boise, Idaho",1.0,14.9,14.9,29.8,14.6,15.3,14.1,22.0,15.0,14.8,13.6,21.7,73.5,0.0,73.5,13.0,11
Blue Stars,"La Crosse, WI",43.812284,-91.251435,DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",5.0,14.7,15.2,29.9,15.1,15.3,15.3,22.85,15.3,15.0,15.3,22.8,75.55,0.0,75.55,11.0,9
Bluecoats,"Canton, OH",40.798546,-81.374951,DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",4.0,16.6,17.3,33.9,16.4,16.5,16.5,24.7,16.5,16.5,16.6,24.8,83.4,0.0,83.4,11.0,9
Boston Crusaders,"Boston, MA",42.355433,-71.060511,DCI World Class,23.0,2024-07-09,"Newnan, Georgia",3.0,17.0,17.0,34.0,16.9,16.7,17.1,25.35,16.5,17.6,17.0,25.5,84.9,0.0,84.9,13.0,15
Bushwackers Drum Corps,"Princeton, NJ",40.349695,-74.659738,DCI All Age World Class,5.0,2024-06-29,"Shelton, Connecticut",1.0,14.8,14.7,29.5,15.1,13.8,14.6,21.75,15.3,15.5,15.6,23.2,74.45,0.0,74.45,3.0,20


Unnamed: 0,Show Number,Corps,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start
0,1,Boston Crusaders,2024-06-26,"Rockford, Michigan",5,15.4,14.4,29.8,14.5,14.3,14.5,21.65,14.3,14.7,14.1,21.55,73.0,0.0,73.0,0
1,1,Phantom Regiment,2024-06-26,"Rockford, Michigan",2,14.8,14.2,29.0,14.0,13.6,13.6,20.6,13.8,14.2,13.1,20.55,70.15,0.0,70.15,0
2,1,The Cavaliers,2024-06-26,"Rockford, Michigan",4,13.7,13.9,27.6,13.8,13.3,12.9,20.0,13.4,14.1,13.8,20.65,68.25,0.0,68.25,0
3,1,Blue Stars,2024-06-26,"Rockford, Michigan",1,14.1,13.3,27.4,13.3,12.8,13.4,19.75,12.9,13.6,12.5,19.5,66.65,0.0,66.65,0
4,1,Colts,2024-06-26,"Rockford, Michigan",3,13.8,13.0,26.8,13.4,12.6,12.8,19.4,12.7,13.3,12.4,19.2,65.4,0.0,65.4,0
5,2,Carolina Crown,2024-06-28,"Muncie, Indiana",6,14.9,15.4,30.3,15.2,15.0,14.6,22.4,15.1,15.1,14.2,22.2,74.9,0.0,74.9,2
6,2,Boston Crusaders,2024-06-28,"Muncie, Indiana",5,15.0,15.0,30.0,14.7,15.2,14.9,22.4,14.8,14.8,15.0,22.3,74.7,0.0,74.7,2
7,2,Phantom Regiment,2024-06-28,"Muncie, Indiana",3,14.5,14.7,29.2,14.5,14.2,13.8,21.25,14.4,14.7,14.3,21.7,72.15,0.0,72.15,2
8,2,The Cavaliers,2024-06-28,"Muncie, Indiana",2,13.7,14.1,27.8,14.0,13.4,13.1,20.25,13.7,14.3,14.1,21.05,69.1,0.0,69.1,2
9,2,Blue Stars,2024-06-28,"Muncie, Indiana",1,13.4,13.8,27.2,13.2,13.3,13.5,20.0,13.3,13.8,12.9,20.0,67.2,0.0,67.2,2


Unnamed: 0,Location,DCI Name
0,"Rockford, Michigan",
1,"Muncie, Indiana",
2,"Walnut, California",
3,"Madison, Wisconsin",
4,"Shelton, Connecticut",
5,"Walnut, California",
6,"Lisle, Illinois",
7,"San Bernardino, California",
8,"Mason, OH",
9,"Cedarburg, Wisconsin",


In [77]:
current_scores = dict()
latest_perf = dict()

for corps in set(scores_table['Corps']):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    current_scores.update({corps: list(scores_table_corps['Total Score'])[-1]})
    latest_perf.update({corps: list(scores_table_corps['Date'])[-1]})

corps_table = corps_table.join(pd.DataFrame(current_scores.items(),columns=['Corps','Latest Score']).set_index('Corps'),how='outer')
corps_table = corps_table.join(pd.DataFrame(latest_perf.items(),columns=['Corps','Last Performance Date']).set_index('Corps'),how='outer')
corps_table.sort_values(by=['Latest Score','Corps'],inplace=True,ascending=[False, True])
display(corps_table)

Unnamed: 0_level_0,Home Location,Latitude,Longitude,Class,Show Number,Date,Location,Performance Slot,General Effect 1,General Effect 2,...,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start,Letters in Name,Latest Score,Last Performance Date
Corps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Boston Crusaders,"Boston, MA",42.355433,-71.060511,DCI World Class,23.0,2024-07-09,"Newnan, Georgia",3.0,17.0,17.0,...,17.6,17.0,25.5,84.9,0.0,84.9,13.0,15,84.9,2024-07-09
Carolina Crown,"Fort Mill, SC",35.00737,-80.945076,DCI World Class,20.0,2024-07-08,"Sevierville, Tennessee",4.0,16.6,16.7,...,18.0,16.4,25.3,83.45,0.0,83.45,12.0,13,83.45,2024-07-08
Bluecoats,"Canton, OH",40.798546,-81.374951,DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",4.0,16.6,17.3,...,16.5,16.6,24.8,83.4,0.0,83.4,11.0,9,83.4,2024-07-07
Blue Devils,"Concord, CA",37.976852,-122.033562,DCI World Class,18.0,2024-07-07,"Sacramento, California",4.0,16.5,16.4,...,16.4,16.4,24.65,82.05,0.0,82.05,11.0,10,82.05,2024-07-07
Phantom Regiment,"Rockford, IL",42.271394,-89.093966,DCI World Class,22.0,2024-07-09,"Mankto, Minnesota",3.0,16.3,16.4,...,16.3,16.1,24.1,81.4,0.0,81.4,13.0,15,81.4,2024-07-09
Mandarins,"Sacramento, CA",38.581061,-121.493895,DCI World Class,21.0,2024-07-09,"Boise, Idaho",3.0,16.3,16.1,...,15.9,14.6,23.5,80.0,0.0,80.0,13.0,9,80.0,2024-07-09
Santa Clara Vanguard,"Santa Clara, CA",37.233325,-121.684635,DCI World Class,18.0,2024-07-07,"Sacramento, California",1.0,15.4,15.8,...,15.3,16.1,23.55,77.5,0.0,77.5,11.0,18,77.5,2024-07-07
The Cavaliers,"Rosemont, IL",41.994133,-87.875674,DCI World Class,16.0,2024-07-06,"Whitewater, Wisconsin",1.0,15.4,15.2,...,15.4,16.4,23.35,77.15,0.0,77.15,10.0,12,77.15,2024-07-06
Colts,"Dubuque, IA",42.500624,-90.664799,DCI World Class,22.0,2024-07-09,"Mankto, Minnesota",2.0,15.7,15.1,...,15.5,15.5,22.85,76.95,0.0,76.95,13.0,5,76.95,2024-07-09
Reading Buccaneers,"Reading, PA",40.335345,-75.927949,DCI All Age World Class,5.0,2024-06-29,"Shelton, Connecticut",3.0,15.5,15.5,...,16.2,15.5,23.7,76.95,0.0,76.95,3.0,17,76.95,2024-06-29


In [78]:
# determine difference in score by comp

scores_table_diff = scores_table.copy(deep=True)
scores_table_diff.iloc[:,5:] = 0.0
scores_table_rate = scores_table.copy(deep=True)
scores_table_rate.iloc[:,5:] = 0.0

for corps in set(scores_table['Corps']):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    scores_table_diff_corps = scores_table_diff.loc[scores_table_diff['Corps'] == corps]
    scores_table_rate_corps = scores_table_rate.loc[scores_table_rate['Corps'] == corps]
    
    for c in scores_table_corps.columns:
        if c not in ['Corps','Location']:
            scores_table_diff_corps.loc[:,c] = scores_table_corps.loc[:,c].diff()
    
    for c in scores_table_corps.columns:
        if c not in ['Corps','Location']:
            scores_table_rate_corps.loc[:,c] = scores_table_diff_corps.loc[:,c] / scores_table_diff_corps.loc[:,"Date"].apply(lambda x: x.days)
    
    for (i, r) in scores_table_diff_corps.iterrows():
        scores_table_diff.iloc[i,:] = r
    
    for (i, r) in scores_table_rate_corps.iterrows():
        scores_table_rate.iloc[i,:] = r
    
    corps_table.loc[corps,'Number of Shows'] = int(scores_table_corps.shape[0])
    corps_table.loc[corps, 'Average Rest'] = scores_table_diff_corps.loc[:,'Date'].apply(lambda x: x.days).mean()
    corps_table.loc[corps, 'Average Score Improvement'] = scores_table_diff_corps.loc[:,'Total Score'].mean()
    
    # normal ranges from 0 to 1
    corps_table.loc[corps,'Average of Last 3 Scores'] = scores_table_corps.loc[-2:,'Total Score'].mean()
    corps_table.loc[corps,'Average Normal General Effect'] = scores_table_corps.loc[-2:,'General Effect Total'].mean() / 40
    corps_table.loc[corps,'Average Normal Visual'] = scores_table_corps.loc[-2:,'Visual Total'].mean() / 30
    corps_table.loc[corps,'Average Normal Music'] = scores_table_corps.loc[-2:,'Music Total'].mean() / 30
    corps_table.loc[corps,'Average Normal Brass'] = scores_table_corps.loc[-2:,'Music Brass'].mean() / 20
    corps_table.loc[corps,'Average Normal Percussion'] = scores_table_corps.loc[-2:,'Music Percussion'].mean() / 20

    if len(set(corps_table.loc[corps, ['Average Normal General Effect', 'Average Normal Visual', 'Average Normal Music']].values)) < len(list(corps_table.loc[corps, ['Average Normal General Effect', 'Average Normal Visual', 'Average Normal Music']].values)):
        corps_table.loc[corps, 'Best Caption'] = 'Multiple'
    else:
        max_column_index = np.argmax(corps_table.loc[corps, ['Average Normal General Effect', 'Average Normal Visual', 'Average Normal Music']].values)
        corps_table.loc[corps, 'Best Caption'] = ['General Effect', 'Visual', 'Music'][max_column_index]


    corps_table.loc[corps, 'Best Music'] = ['Brass', 'Percussion'][int(corps_table.loc[corps, 'Average Normal Brass'] < corps_table.loc[corps, 'Average Normal Percussion'])] if corps_table.loc[corps, 'Average Normal Brass'] != corps_table.loc[corps, 'Average Normal Percussion'] else 'Neither'

    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    #     if corps == "Phantom Regiment":
    #         display(scores_table_corps)
    #         display(scores_table_diff_corps)
    #         display(scores_table_rate_corps)

corps_table['Rank of Last 3 Scores'] = corps_table['Average of Last 3 Scores'].rank(ascending=False, method='max')
corps_table['Rank of Last Score'] = corps_table['Latest Score'].rank(ascending=False, method='max')
corps_table['Rank of Last 3 Scores No All-Age'] = corps_table.loc[['All Age' not in str(x) for x in corps_table['Class']], 'Average of Last 3 Scores'].rank(ascending=False, method='max')
corps_table['Rank of Last Score No All-Age'] = corps_table.loc[['All Age' not in str(x) for x in corps_table['Class']], 'Latest Score'].rank(ascending=False, method='max')

def ordinal(n):
    if str(n) == 'nan':
        return None
    else:
        return "%d%s" % (int(n),"tsnrhtdd"[(int(n)//10%10!=1)*(int(n)%10<4)*int(n)%10::4])
    
corps_table['Ordinal Rank of Last 3 Scores'] = corps_table['Rank of Last 3 Scores'].apply(ordinal)
corps_table['Ordinal Rank of Last Score'] = corps_table['Rank of Last Score'].apply(ordinal)
corps_table['Ordinal Rank of Last 3 Scores No All-Age'] = corps_table['Rank of Last 3 Scores No All-Age'].apply(ordinal)
corps_table['Ordinal Rank of Last Score No All-Age'] = corps_table['Rank of Last Score No All-Age'].apply(ordinal)


championship_rounds_table = pd.DataFrame([None,'Prelims','Semis','Finals'],columns=['Championship Rounds'])

def calc_championship_round(rank: float):
    if rank <= 12:
        return championship_rounds_table['Championship Rounds'][3]
    elif rank <= 25:
        return championship_rounds_table['Championship Rounds'][2]
    elif rank <= 40:
        return championship_rounds_table['Championship Rounds'][1]
    else:
        return championship_rounds_table['Championship Rounds'][0]

corps_table['Projected Championship Round'] = corps_table['Rank of Last 3 Scores No All-Age'].apply(calc_championship_round)

captions_table = pd.DataFrame(['General Effect','Visual','Music','Multiple','Brass','Percussion','Neither'],columns=['Caption Title'])

with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(corps_table)

Unnamed: 0_level_0,Home Location,Latitude,Longitude,Class,Show Number,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start,Letters in Name,Latest Score,Last Performance Date,Number of Shows,Average Rest,Average Score Improvement,Average of Last 3 Scores,Average Normal General Effect,Average Normal Visual,Average Normal Music,Average Normal Brass,Average Normal Percussion,Best Caption,Best Music,Rank of Last 3 Scores,Rank of Last Score,Rank of Last 3 Scores No All-Age,Rank of Last Score No All-Age,Ordinal Rank of Last 3 Scores,Ordinal Rank of Last Score,Ordinal Rank of Last 3 Scores No All-Age,Ordinal Rank of Last Score No All-Age,Projected Championship Round
Corps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
Boston Crusaders,"Boston, MA",42.355433,-71.060511,DCI World Class,23.0,2024-07-09,"Newnan, Georgia",3.0,17.0,17.0,34.0,16.9,16.7,17.1,25.35,16.5,17.6,17.0,25.5,84.9,0.0,84.9,13.0,15,84.9,2024-07-09,5.0,3.25,2.975,77.53,0.7825,0.772,0.768667,0.757,0.775,General Effect,Percussion,4.0,1.0,4.0,1.0,4th,1st,4th,1st,Finals
Carolina Crown,"Fort Mill, SC",35.00737,-80.945076,DCI World Class,20.0,2024-07-08,"Sevierville, Tennessee",4.0,16.6,16.7,33.3,16.7,16.5,16.5,24.85,16.2,18.0,16.4,25.3,83.45,0.0,83.45,12.0,13,83.45,2024-07-08,4.0,3.333333,2.85,78.35,0.79,0.77625,0.782083,0.77625,0.76875,General Effect,Brass,3.0,2.0,3.0,2.0,3rd,2nd,3rd,2nd,Finals
Bluecoats,"Canton, OH",40.798546,-81.374951,DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",4.0,16.6,17.3,33.9,16.4,16.5,16.5,24.7,16.5,16.5,16.6,24.8,83.4,0.0,83.4,11.0,9,83.4,2024-07-07,3.0,2.5,2.325,81.35,0.825833,0.805556,0.805,0.795,0.811667,General Effect,Percussion,1.0,3.0,1.0,3.0,1st,3rd,1st,3rd,Finals
Blue Devils,"Concord, CA",37.976852,-122.033562,DCI World Class,18.0,2024-07-07,"Sacramento, California",4.0,16.5,16.4,32.9,16.2,16.8,16.0,24.5,16.5,16.4,16.4,24.65,82.05,0.0,82.05,11.0,10,82.05,2024-07-07,6.0,1.8,1.09,79.158333,0.794583,0.786944,0.792222,0.791667,0.791667,General Effect,Neither,2.0,4.0,2.0,4.0,2nd,4th,2nd,4th,Finals
Phantom Regiment,"Rockford, IL",42.271394,-89.093966,DCI World Class,22.0,2024-07-09,"Mankto, Minnesota",3.0,16.3,16.4,32.7,16.1,16.6,16.5,24.6,15.8,16.3,16.1,24.1,81.4,0.0,81.4,13.0,15,81.4,2024-07-09,8.0,1.857143,1.607143,76.3,0.770625,0.759167,0.756667,0.7475,0.758125,General Effect,Percussion,7.0,5.0,5.0,5.0,7th,5th,5th,5th,Finals
Mandarins,"Sacramento, CA",38.581061,-121.493895,DCI World Class,21.0,2024-07-09,"Boise, Idaho",3.0,16.3,16.1,32.4,15.9,16.3,16.0,24.1,16.5,15.9,14.6,23.5,80.0,0.0,80.0,13.0,9,80.0,2024-07-09,7.0,1.833333,1.391667,75.35,0.758929,0.754524,0.745238,0.757143,0.733571,General Effect,Brass,8.0,6.0,6.0,6.0,8th,6th,6th,6th,Finals
Santa Clara Vanguard,"Santa Clara, CA",37.233325,-121.684635,DCI World Class,18.0,2024-07-07,"Sacramento, California",1.0,15.4,15.8,31.2,15.6,15.4,14.5,22.75,15.7,15.3,16.1,23.55,77.5,0.0,77.5,11.0,18,77.5,2024-07-07,6.0,1.8,0.97,74.675,0.742083,0.739444,0.760278,0.745,0.785833,Music,Percussion,11.0,7.0,8.0,7.0,11th,7th,8th,7th,Finals
The Cavaliers,"Rosemont, IL",41.994133,-87.875674,DCI World Class,16.0,2024-07-06,"Whitewater, Wisconsin",1.0,15.4,15.2,30.6,15.5,15.5,15.4,23.2,14.9,15.4,16.4,23.35,77.15,0.0,77.15,10.0,12,77.15,2024-07-06,6.0,2.0,1.78,72.883333,0.729167,0.721389,0.735833,0.718333,0.756667,Music,Percussion,15.0,8.0,10.0,8.0,15th,8th,10th,8th,Finals
Colts,"Dubuque, IA",42.500624,-90.664799,DCI World Class,22.0,2024-07-09,"Mankto, Minnesota",2.0,15.7,15.1,30.8,15.0,16.0,15.6,23.3,14.7,15.5,15.5,22.85,76.95,0.0,76.95,13.0,5,76.95,2024-07-09,8.0,1.857143,1.65,71.675,0.721875,0.713958,0.712708,0.700625,0.725625,General Effect,Percussion,19.0,10.0,13.0,9.0,19th,10th,13th,9th,Semis
Reading Buccaneers,"Reading, PA",40.335345,-75.927949,DCI All Age World Class,5.0,2024-06-29,"Shelton, Connecticut",3.0,15.5,15.5,31.0,15.5,14.1,14.9,22.25,15.7,16.2,15.5,23.7,76.95,0.0,76.95,3.0,17,76.95,2024-06-29,1.0,,,76.95,0.775,0.741667,0.79,0.785,0.775,Music,Brass,5.0,10.0,,,5th,10th,,,


In [79]:
import plotly.graph_objects as go
import plotly.colors as pc

(a, b) = np.polyfit(scores_table['Days from Season Start'].astype(int), scores_table['Total Score'].astype(float), 1)
growth_fit = lambda x: (a * x) + b

scores_table['Total Score Residual'] = scores_table['Total Score'] - scores_table['Days from Season Start'].apply(growth_fit)

fig = go.Figure()

# adding player
ranking = 1
trace_colors = pc.qualitative.Bold
for (i, corps) in enumerate(corps_table.index):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    scores_table_diff_corps = scores_table_diff.loc[scores_table['Corps'] == corps]
    scores_table_rate_corps = scores_table_rate.loc[scores_table['Corps'] == corps]
    fig.add_trace(go.Scatter(
        x=scores_table_corps['Date'],
        y=scores_table_corps['Total Score'],
        name=f'#{ranking} ({corps_table.loc[corps,"Latest Score"]:.3f}) {corps}',
        mode='lines+markers',
        connectgaps=True,
        line=dict(
            #shape='hv',
            color=trace_colors[i % len(trace_colors)]
        ),
        text=[
            f'<b>{x[0]}</b><br><br>' + 
            f"{x[1].strftime('%A, %d %B %Y')}<br>"
            f"{x[2]}<br><br>" +
            f"Competition Score: {x[3]:.3f}<br>"
            f"Difference: {x[4]:+.3f}<br>"
            f"Score Growth: {x[5]:+.3f} per day"
            for x in zip(
                scores_table_corps['Corps'],
                scores_table_corps['Date'],
                scores_table_corps['Location'],
                scores_table_corps['Total Score'],
                scores_table_diff_corps['Total Score'],
                scores_table_rate_corps['Total Score']
            )
        ]
    ))
    ranking += 1

updated_time = f'<i>Updated {str(datetime.datetime.now().strftime("%A, %b %d, %Y %H:%M:%S"))} CT</i>'

def active_ranking(cat):
    i = 0
    t = 0
    retList = list()
    for c in cat:
        retList.append(f"#{i+1} ({corps_table['Latest Score'][t]:.3f}) {corps_table.index[t]}")
        if c is True:
            i += 1
        t += 1
    return retList 

dropdown_labels = ["All Corps"] + corps_classes

dropdown_categories = [[True for _ in corps_table['Class']]] + [
    [x == corps_classes[i] for x in corps_table['Class']] for i in range(len(corps_classes))
]

dropdown_dicts = [
    dict(
    label=label,
    method='restyle',
    args=[{"visible": category, "name": active_ranking(category)}]
    ) for (label, category) in zip(dropdown_labels, dropdown_categories)
]


fig.update_layout(
    title=f'<b>DCI 2024 Scores by Roman Ramirez</b><br>{updated_time}<br>',
    xaxis_title='<b>Date</b>',
    yaxis_title='<b>Total Score</b>',
    updatemenus=[
        dict(
            active=0,
            buttons=dropdown_dicts
            ,
        )       
    ]
)

customdata = np.stack((
    list(scores_table['Location']),
    list(scores_table_diff['Total Score']),
    list(scores_table['Corps'])
    ), axis=0)
hovertemplate = (
    '%{text}<br>' +
    '<extra></extra>'
)

fig.update_traces(
    customdata=customdata,
    hovertemplate=hovertemplate,
    opacity=0.8,
    legendgrouptitle_text='<b>#<i>Rank</i> (<i>Latest Score</i>) <i>Corps</i></b>'
)

fig.show()
fig.write_html("index.html")

export(scores_table, 'scores', True)
export(corps_table, 'corps', True)
export(comps_table, 'comps', True)
export(captions_table, 'captions', True)
export(championship_rounds_table, 'championship_rounds', True)

#initialze the excel writer
writer = pd.ExcelWriter('exports/dci_data.xlsx', engine='xlsxwriter')

#store your dataframes in a  dict, where the key is the sheet name you want
frames = {'scores': scores_table, 'corps': corps_table, 'comps': comps_table, 'captions': captions_table, 'championship rounds': championship_rounds_table}

#now loop thru and put each on a specific sheet
for sheet, frame in  frames.items(): # .use .items for python 3.X
    frame.to_excel(writer, sheet_name = sheet)

#critical last step
writer.close()

In [80]:
#to dos

# in import, create "static" corps master file
# # contains class, and other corps-related info

# SQL join this on scores table
# add buttons to filter by class