<a href="https://colab.research.google.com/github/stephen-e-cox/whipPy/blob/main/whipPy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import auth
auth.authenticate_user()

import re
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import gspread
from google.colab import auth
from google.auth import default

auth.authenticate_user()
creds, _ = default()

gc = gspread.authorize(creds)

In [20]:
## This is the master spreadsheet
wb = gc.open_by_url("https://docs.google.com/spreadsheets/d/1n_xmaQuNmj3JBfOXJEN8c4HZAOUPOjtPAM-fY4h4K54/edit#gid=0")

## This is the new race file to read. Drop it in the temporary files list in Colab. It will not be saved.
racefile_name = "2023_NYRR_Al_Gordon_4M" ## change this line only
racefile = open(racefile_name + ".txt")

## This creates a new worksheet with the name of the racefile. If it already
## exists, it will be deleted!
try:
  ws = wb.add_worksheet(racefile_name, rows=1, cols=1)
except:
  worksheet = wb.worksheet(racefile_name)
  wb.del_worksheet(worksheet)
  ws = wb.add_worksheet(racefile_name, rows=1, cols=1)

## This creates a single archive of the summary spreadsheet, and deletes any
## existing one. This is just a light backup solution in case you screw up an
## import
sum_worksheet = wb.worksheet("Summary")
try:
  wb.duplicate_sheet(sum_worksheet.id, new_sheet_name="Summary_archive")
except:
  sum_arch_worksheet = wb.worksheet("Summary_archive")
  wb.del_worksheet(sum_arch_worksheet)
  wb.duplicate_sheet(sum_worksheet.id, new_sheet_name="Summary_archive")

In [38]:
## Process the race results

## Split it up by line first
racefile.seek(0)
data = racefile.read().split('\n')

## The format we get from NYRR page copy/paste is eight lines per entry
chunks = [data[x:x+8] for x in range(0, len(data), 8)]

racedata = pd.DataFrame(chunks, columns=["Name", "AgeLocBib", "Text1", "Time", "Text2", "Pace", "Text3", "Place"])

## Split Age/Location/Bib line
racedata[["Age", "LocBib"]] = racedata["AgeLocBib"].str.split(n=1, expand=True)
racedata[["Location", "Text4", "Bib"]] = racedata["LocBib"].str.rsplit(n=2, expand=True)

## We don't need labels like "Time" from the text file. We are also not using these
## to reshape because there is no "Name" or "Age/Location" label
racedata.drop(columns=["AgeLocBib", "LocBib", "Text1", "Text2", "Text3", "Text4"], inplace=True)



                    Name     Time   Pace  Place  Age                 Location  \
0        Frederick Hines  0:21:36  05:24     10  M43                      CAN   
1             Brian Tsau  0:22:49  05:43     30  M25          NEW YORK NY USA   
2          Michael Fosco  0:23:17  05:50     44  M29          BROOKLYN NY USA   
3      Michael Cosentino  0:23:35  05:54     53  M46                      CAN   
4         Alexander Dano  0:24:18  06:05     95  M26          NEW YORK NY USA   
5          Lauren Dorsky  0:24:39  06:10    118  W27  BROOKLYN HEIGHTS NY USA   
6         Jeffrey Clarke  0:25:27  06:22    170  M39      FOREST HILLS NY USA   
7      Katherine Edwards  0:25:28  06:22    171  W35                      CAN   
8             Andrew Lee  0:25:42  06:26    195  M39         SUNNYSIDE NY USA   
9       Stephen Tuozzolo  0:25:44  06:26    201  M32          NEW YORK NY USA   
10         Adiel Schmidt  0:26:34  06:39    289  W28          NEW YORK NY USA   
11          Armani Abreu  0: