In [4]:
from os import listdir
from os.path import isfile, join
import numpy as np
import pandas as pd
from IPython.display import display
import datetime
import re
import csv
from geopy.geocoders import Nominatim

mypath = r"C:\Users\justr\Documents\dci-2024\raw data"
files = [f for f in listdir(mypath) if isfile(join(mypath, f))]

with open('inputs/corps_data.csv', 'r') as f:
  list_of_corps = csv.reader(f)
  list_of_corps = [x[0] for x in list_of_corps]

def export(df: pd.DataFrame, path_no_dot: str, index=True):
    df.to_csv(f'exports/{path_no_dot}.csv', index=index)
    df.to_excel(f'exports/{path_no_dot}.xlsx', index=index)

# for f in files:
#     print(f)

In [5]:
scores_list = list()
corps_dict = dict()

scores_table = pd.DataFrame(np.zeros([1,18]))
scores_table.columns = ['Corps','Date','Location','Performance Slot','General Effect 1','General Effect 2','General Effect Total','Visual Proficiency','Visual Analysis','Color Guard','Visual Total','Music Brass','Music Analysis','Music Percussion','Music Total','Sub Total','Penalty','Total Score']

corps_classes = ['DCI World Class', 'DCI Open Class','DCI All Age World Class','DCI All Age Open Class','DCI All Age Class A']

for (show_number, file) in enumerate(files):
    # print(file)

    df_list = pd.read_html("raw data/"+file)
    df = df_list[-1]
    df = df.applymap(lambda x: re.sub('[^!-~]+',' ',x).strip() if type(x) == str else x)

    

    date_and_place = df.iloc[0, 0]
    chunks = date_and_place.split(sep=' ')
    place = " ".join(chunks[1:])

    # creating corps static file:
    corps_class = None
    running_name = ""
    for r in df.iloc[4:,0]: # this slice could be buggy, blind assumption that we can always skip the first 4 lines
        # print(f"r is {r}")
        if r in corps_classes:
            corps_class = r
        if corps_class != r:
            if running_name == "":
                running_name = str(r)
            else:
                if type(r) is str:
                    running_name += " " + r
                if running_name != 'nan':
                    corps_dict.update({running_name: corps_class})
                    running_name = ""
    
    for (i, r) in enumerate(df[0]):
        if type(r) == str:
            if r in corps_classes:
                df.drop(i, inplace=True)


    df = df.iloc[4:-1].reset_index(drop=True)


    # grab names
    names = df[0].reset_index(drop=True)

    clean_names = list()
    for (i, r) in enumerate(names):
        if (i % 2) == 0:
            running_name = str(r)
        else:
            try:
                float(r)
            except:
                running_name += " " + r
            clean_names.append(running_name)
    # print(clean_names)

    df = df.iloc[::2, :].reset_index(drop=True)
    df.columns = ['Corps','Performance Slot','General Effect 1','General Effect 2','General Effect Total','Visual Proficiency','Visual Analysis','Color Guard','Visual Total','Music Brass','Music Analysis','Music Percussion','Music Total','Sub Total','Penalty','Total Score']
    df['Corps'] = clean_names
    df.insert(1,column='Date',value=pd.to_datetime(chunks[0]))
    df.insert(2,column='Location',value=place)
    df.insert(0,column='Show Number',value=show_number+1)
    df.iloc[:,5:] = df.iloc[:,5:].astype(float)

    
    for (i, r) in df.iterrows():
        if np.isnan(r['Total Score']):
            df.drop(i, inplace=True)            
    
    df['Performance Slot'] = df['Performance Slot'].astype(int)

    scores_list.append(df)

scores_table = pd.concat(scores_list)
scores_table.reset_index(inplace=True,drop=True)

scores_table['Days from Season Start'] = (scores_table['Date'] - scores_table['Date'].min()).apply(lambda x: x.days)

corps_table = pd.DataFrame(corps_dict.items(), columns=['Corps', 'Class'])
corps_table.set_index('Corps',inplace=True)

corps_rename_dict = {
    "Vanguard": "Santa Clara Vanguard",
    "Academy": "The Academy",
    "Cavaliers": "The Cavaliers",
    "Bushwackers": "Bushwackers Drum Corps",
    "Connecticut Hurricanes": "Hurricanes"
}

for (k, v) in corps_rename_dict.items():
    corps_table.rename(index={k: v}, inplace=True)
    scores_table.replace(k, v, inplace=True)

shell_corps_table = pd.read_csv('inputs/corps_data.csv').set_index('Corps')
corps_table = shell_corps_table.join(other=corps_table,how='outer')

for corps in corps_table.index:
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    if scores_table_corps.shape[0] == 0:
        continue
    scores_table_corps_last = scores_table_corps.iloc[-1,:].to_dict()
    for (k, v) in scores_table_corps_last.items():
        if k != "Corps":
            corps_table.loc[corps,k] = v


with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(corps_table)
    display(scores_table)

Unnamed: 0_level_0,Home Location,Class,Show Number,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start
Corps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7th Regiment,"New London, CT",DCI Open Class,5.0,2024-06-29,"Shelton, Connecticut",1.0,11.1,11.2,22.3,8.8,12.4,10.9,16.05,8.9,11.1,10.0,15.0,53.35,0.0,53.35,3.0
Atlanta CV,"Atlanta, GA",,,NaT,,,,,,,,,,,,,,,,,
Blue Devils,"Concord, CA",DCI World Class,18.0,2024-07-07,"Sacramento, California",4.0,16.5,16.4,32.9,16.2,16.8,16.0,24.5,16.5,16.4,16.4,24.65,82.05,0.0,82.05,11.0
Blue Devils B,"Concord, CA",DCI Open Class,18.0,2024-07-07,"Sacramento, California",5.0,14.1,14.2,28.3,12.9,13.6,12.6,19.55,12.8,13.7,14.0,20.25,68.1,0.0,68.1,11.0
Blue Devils C,"Concord, CA",DCI Open Class,18.0,2024-07-07,"Sacramento, California",2.0,11.0,10.4,21.4,9.3,11.3,10.0,15.3,10.2,10.7,10.2,15.55,52.25,0.0,52.25,11.0
Blue Knights,"Denver, CO",DCI World Class,19.0,2024-07-08,"Kennewick, Washington",2.0,14.9,14.8,29.7,15.2,15.0,15.1,22.65,14.6,14.7,14.8,22.05,74.4,0.0,74.4,12.0
Blue Stars,"La Crosse, WI",DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",5.0,14.7,15.2,29.9,15.1,15.3,15.3,22.85,15.3,15.0,15.3,22.8,75.55,0.0,75.55,11.0
Bluecoats,"Canton, OH",DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",4.0,16.6,17.3,33.9,16.4,16.5,16.5,24.7,16.5,16.5,16.6,24.8,83.4,0.0,83.4,11.0
Boston Crusaders,"Boston, MA",DCI World Class,9.0,2024-07-02,"Mason, OH",2.0,16.1,15.7,31.8,15.8,15.2,15.8,23.4,15.4,15.4,15.7,23.25,78.45,0.0,78.45,6.0
Bushwackers Drum Corps,"Princeton, NJ",DCI All Age World Class,5.0,2024-06-29,"Shelton, Connecticut",1.0,14.8,14.7,29.5,15.1,13.8,14.6,21.75,15.3,15.5,15.6,23.2,74.45,0.0,74.45,3.0


Unnamed: 0,Show Number,Corps,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,Visual Analysis,Color Guard,Visual Total,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start
0,1,Boston Crusaders,2024-06-26,"Rockford, Michigan",5,15.4,14.4,29.8,14.5,14.3,14.5,21.65,14.3,14.7,14.1,21.55,73.0,0.0,73.0,0
1,1,Phantom Regiment,2024-06-26,"Rockford, Michigan",2,14.8,14.2,29.0,14.0,13.6,13.6,20.6,13.8,14.2,13.1,20.55,70.15,0.0,70.15,0
2,1,The Cavaliers,2024-06-26,"Rockford, Michigan",4,13.7,13.9,27.6,13.8,13.3,12.9,20.0,13.4,14.1,13.8,20.65,68.25,0.0,68.25,0
3,1,Blue Stars,2024-06-26,"Rockford, Michigan",1,14.1,13.3,27.4,13.3,12.8,13.4,19.75,12.9,13.6,12.5,19.5,66.65,0.0,66.65,0
4,1,Colts,2024-06-26,"Rockford, Michigan",3,13.8,13.0,26.8,13.4,12.6,12.8,19.4,12.7,13.3,12.4,19.2,65.4,0.0,65.4,0
5,2,Carolina Crown,2024-06-28,"Muncie, Indiana",6,14.9,15.4,30.3,15.2,15.0,14.6,22.4,15.1,15.1,14.2,22.2,74.9,0.0,74.9,2
6,2,Boston Crusaders,2024-06-28,"Muncie, Indiana",5,15.0,15.0,30.0,14.7,15.2,14.9,22.4,14.8,14.8,15.0,22.3,74.7,0.0,74.7,2
7,2,Phantom Regiment,2024-06-28,"Muncie, Indiana",3,14.5,14.7,29.2,14.5,14.2,13.8,21.25,14.4,14.7,14.3,21.7,72.15,0.0,72.15,2
8,2,The Cavaliers,2024-06-28,"Muncie, Indiana",2,13.7,14.1,27.8,14.0,13.4,13.1,20.25,13.7,14.3,14.1,21.05,69.1,0.0,69.1,2
9,2,Blue Stars,2024-06-28,"Muncie, Indiana",1,13.4,13.8,27.2,13.2,13.3,13.5,20.0,13.3,13.8,12.9,20.0,67.2,0.0,67.2,2


In [6]:
current_scores = dict()
latest_perf = dict()

for corps in set(scores_table['Corps']):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    current_scores.update({corps: list(scores_table_corps['Total Score'])[-1]})
    latest_perf.update({corps: list(scores_table_corps['Date'])[-1]})

corps_table = corps_table.join(pd.DataFrame(current_scores.items(),columns=['Corps','Latest Score']).set_index('Corps'),how='outer')
corps_table = corps_table.join(pd.DataFrame(latest_perf.items(),columns=['Corps','Last Performance Date']).set_index('Corps'),how='outer')
corps_table.sort_values(by=['Latest Score','Corps'],inplace=True,ascending=[False, True])
display(corps_table)

Unnamed: 0_level_0,Home Location,Class,Show Number,Date,Location,Performance Slot,General Effect 1,General Effect 2,General Effect Total,Visual Proficiency,...,Music Brass,Music Analysis,Music Percussion,Music Total,Sub Total,Penalty,Total Score,Days from Season Start,Latest Score,Last Performance Date
Corps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Carolina Crown,"Fort Mill, SC",DCI World Class,20.0,2024-07-08,"Sevierville, Tennessee",4.0,16.6,16.7,33.3,16.7,...,16.2,18.0,16.4,25.3,83.45,0.0,83.45,12.0,83.45,2024-07-08
Bluecoats,"Canton, OH",DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",4.0,16.6,17.3,33.9,16.4,...,16.5,16.5,16.6,24.8,83.4,0.0,83.4,11.0,83.4,2024-07-07
Blue Devils,"Concord, CA",DCI World Class,18.0,2024-07-07,"Sacramento, California",4.0,16.5,16.4,32.9,16.2,...,16.5,16.4,16.4,24.65,82.05,0.0,82.05,11.0,82.05,2024-07-07
Phantom Regiment,"Rockford, IL",DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",2.0,15.8,16.9,32.7,16.0,...,15.6,16.1,16.0,23.85,80.65,0.0,80.65,11.0,80.65,2024-07-07
Boston Crusaders,"Boston, MA",DCI World Class,9.0,2024-07-02,"Mason, OH",2.0,16.1,15.7,31.8,15.8,...,15.4,15.4,15.7,23.25,78.45,0.0,78.45,6.0,78.45,2024-07-02
Mandarins,"Sacramento, CA",DCI World Class,18.0,2024-07-07,"Sacramento, California",5.0,16.1,16.0,32.1,15.4,...,15.9,15.5,15.0,23.2,78.4,0.0,78.4,11.0,78.4,2024-07-07
Santa Clara Vanguard,"Santa Clara, CA",DCI World Class,18.0,2024-07-07,"Sacramento, California",1.0,15.4,15.8,31.2,15.6,...,15.7,15.3,16.1,23.55,77.5,0.0,77.5,11.0,77.5,2024-07-07
The Cavaliers,"Rosemont, IL",DCI World Class,16.0,2024-07-06,"Whitewater, Wisconsin",1.0,15.4,15.2,30.6,15.5,...,14.9,15.4,16.4,23.35,77.15,0.0,77.15,10.0,77.15,2024-07-06
Reading Buccaneers,"Reading, PA",DCI All Age World Class,5.0,2024-06-29,"Shelton, Connecticut",3.0,15.5,15.5,31.0,15.5,...,15.7,16.2,15.5,23.7,76.95,0.0,76.95,3.0,76.95,2024-06-29
Colts,"Dubuque, IA",DCI World Class,17.0,2024-07-07,"LaCrosse, Wisconsin",3.0,15.0,16.0,31.0,14.8,...,14.7,15.4,15.6,22.85,76.65,0.0,76.65,11.0,76.65,2024-07-07


In [7]:
# determine difference in score by comp

scores_table_diff = scores_table.copy(deep=True)
scores_table_diff.iloc[:,5:] = 0.0
scores_table_rate = scores_table.copy(deep=True)
scores_table_rate.iloc[:,5:] = 0.0

for corps in set(scores_table['Corps']):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    scores_table_diff_corps = scores_table_diff.loc[scores_table_diff['Corps'] == corps]
    scores_table_rate_corps = scores_table_rate.loc[scores_table_rate['Corps'] == corps]
    
    for c in scores_table_corps.columns:
        if c not in ['Corps','Location']:
            scores_table_diff_corps.loc[:,c] = scores_table_corps.loc[:,c].diff()
    
    for c in scores_table_corps.columns:
        if c not in ['Corps','Location']:
            scores_table_rate_corps.loc[:,c] = scores_table_diff_corps.loc[:,c] / scores_table_diff_corps.loc[:,"Date"].apply(lambda x: x.days)
    
    for (i, r) in scores_table_diff_corps.iterrows():
        scores_table_diff.iloc[i,:] = r
    
    for (i, r) in scores_table_rate_corps.iterrows():
        scores_table_rate.iloc[i,:] = r

    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    #     if corps == "Phantom Regiment":
    #         display(scores_table_diff_corps)
    #         display(scores_table_rate_corps)

# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(scores_table_diff)
# determine difference in score rates (by day)

In [8]:
import plotly.graph_objects as go
import plotly.colors as pc

(a, b) = np.polyfit(scores_table['Days from Season Start'].astype(int), scores_table['Total Score'].astype(float), 1)
growth_fit = lambda x: (a * x) + b

scores_table['Total Score Residual'] = scores_table['Total Score'] - scores_table['Days from Season Start'].apply(growth_fit)

fig = go.Figure()

# adding player
ranking = 1
trace_colors = pc.qualitative.Bold
for (i, corps) in enumerate(corps_table.index):
    scores_table_corps = scores_table.loc[scores_table['Corps'] == corps]
    scores_table_diff_corps = scores_table_diff.loc[scores_table['Corps'] == corps]
    scores_table_rate_corps = scores_table_rate.loc[scores_table['Corps'] == corps]
    fig.add_trace(go.Scatter(
        x=scores_table_corps['Date'],
        y=scores_table_corps['Total Score'],
        name=f'#{ranking} ({corps_table.loc[corps,"Latest Score"]:.3f}) {corps}',
        mode='lines+markers',
        connectgaps=True,
        line=dict(
            #shape='hv',
            color=trace_colors[i % len(trace_colors)]
        ),
        text=[
            f'<b>{x[0]}</b><br><br>' + 
            f"{x[1].strftime('%A, %d %B %Y')}<br>"
            f"{x[2]}<br><br>" +
            f"Competition Score: {x[3]:.3f}<br>"
            f"Difference: {x[4]:+.3f}<br>"
            f"Score Growth: {x[5]:+.3f} per day"
            for x in zip(
                scores_table_corps['Corps'],
                scores_table_corps['Date'],
                scores_table_corps['Location'],
                scores_table_corps['Total Score'],
                scores_table_diff_corps['Total Score'],
                scores_table_rate_corps['Total Score']
            )
        ]
    ))
    ranking += 1

updated_time = f'<i>Updated {str(datetime.datetime.now().strftime("%A, %b %d, %Y %H:%M:%S"))} CT</i>'

def active_ranking(cat):
    i = 0
    t = 0
    retList = list()
    for c in cat:
        retList.append(f"#{i+1} ({corps_table['Latest Score'][t]:.3f}) {corps_table.index[t]}")
        if c is True:
            i += 1
        t += 1
    return retList 

dropdown_labels = ["All Corps"] + corps_classes

dropdown_categories = [[True for _ in corps_table['Class']]] + [
    [x == corps_classes[i] for x in corps_table['Class']] for i in range(len(corps_classes))
]

dropdown_dicts = [
    dict(
    label=label,
    method='restyle',
    args=[{"visible": category, "name": active_ranking(category)}]
    ) for (label, category) in zip(dropdown_labels, dropdown_categories)
]


fig.update_layout(
    title=f'<b>DCI 2024 Scores by Roman Ramirez</b><br>{updated_time}<br>',
    xaxis_title='<b>Date</b>',
    yaxis_title='<b>Total Score</b>',
    updatemenus=[
        dict(
            active=0,
            buttons=dropdown_dicts
            ,
        )       
    ]
)

customdata = np.stack((
    list(scores_table['Location']),
    list(scores_table_diff['Total Score']),
    list(scores_table['Corps'])
    ), axis=0)
hovertemplate = (
    '%{text}<br>' +
    '<extra></extra>'
)

fig.update_traces(
    customdata=customdata,
    hovertemplate=hovertemplate,
    opacity=0.8,
    legendgrouptitle_text='<b>#<i>Rank</i> (<i>Latest Score</i>) <i>Corps</i></b>'
)

fig.show()
fig.write_html("index.html")

export(scores_table, 'scores', True)
export(corps_table, 'corps', True)

In [9]:
#to dos

# in import, create "static" corps master file
# # contains class, and other corps-related info

# SQL join this on scores table
# add buttons to filter by class