In [21]:
from googleapiclient.discovery import build
import pandas as pd
from datetime import datetime
import calendar
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

with open('api_key.txt', 'r') as f:
    API_KEY = f.read().strip()

# === CONFIG ===
SHEET_ID = '1Er54lmX1jBCbjQWkcFjWX6hp0WsoTiSqeb8U9SNIReo'
RANGE = 'A1:Z215'

# === SETUP ===
service = build('sheets', 'v4', developerKey=API_KEY)
sheet = service.spreadsheets()

# === STEP 1: Get full sheet grid with formatting ===
result = sheet.get(
    spreadsheetId=SHEET_ID,
    ranges=[RANGE],
    includeGridData=True
).execute()

grid = result['sheets'][0]['data'][0]['rowData']

In [22]:
# Step 1: Find all month labels (like 'JANUARY', 'FEBRUARY', etc.)
month_positions = {}  # (row_idx, col_idx) => month_num
month_names = {name.upper(): i for i, name in enumerate(calendar.month_name) if name}

for row_idx, row in enumerate(grid):
    for col_idx, cell in enumerate(row.get('values', [])):
        val = cell.get('formattedValue', '')
        val_clean = val.strip().upper()
        if val_clean in month_names:
            month_num = month_names[val_clean]
            month_positions[(row_idx, col_idx)] = month_num

# Step 2: For each day-number cell, find the nearest month label *above and to the right*
date_map = {}  # (row_idx, col_idx) -> datetime

for row_idx, row in enumerate(grid):
    for col_idx, cell in enumerate(row.get('values', [])):
        val = cell.get('formattedValue', '')
        if not val.isdigit():
            continue
        day = int(val)

        # Find the closest month label above and to the right
        best_month = None
        best_distance = float('inf')

        for (m_row, m_col), month_num in month_positions.items():
            if m_row < row_idx and m_col <= col_idx:
                dist = (row_idx - m_row) + (col_idx - m_col)
                if dist < best_distance:
                    best_distance = dist
                    best_month = month_num

        if best_month is not None:
            try:
                date = datetime(2025, best_month, day)
                date_map[(row_idx, col_idx)] = date
            except ValueError:
                pass  # skip invalid dates like Feb 30


In [None]:
events = []
merged_ranges = result['sheets'][0].get('merges', [])
visited = set()

# Helper to find merged range for a given cell
def get_merged_range(row, col):
    for mr in merged_ranges:
        if (
            mr['startRowIndex'] <= row < mr['endRowIndex']
            and mr['startColumnIndex'] <= col < mr['endColumnIndex']
        ):
            return mr
    return None

for row_idx, row in enumerate(grid):
    for col_idx, cell in enumerate(row.get('values', [])):
        if (row_idx, col_idx) in visited:
            continue

        val = cell.get('formattedValue')
        if not val or val.strip().isdigit():
            continue
        link = cell.get('hyperlink', '')

        # === Get event start date and event type color from the date cell above ===
        event_date = None
        event_type = 'Other'

        for r2 in range(row_idx, 0, -1):
            if (r2, col_idx) in date_map:
                event_date = date_map[(r2, col_idx)]

                # Try to read the background color from the date cell
                try:
                    date_cell = grid[r2]['values'][col_idx]  # <-- correct path
                    color = date_cell.get('effectiveFormat', {}).get('backgroundColor', {})
                    r = color.get('red', 0)
                    g = color.get('green', 0)
                    b = color.get('blue', 0)

                    if r > 0.8 and g < 0.5:
                        event_type = 'GBHL100'
                    elif r > 0.8 and g > 0.8:
                        event_type = 'GBHL90'
                    elif b > 0.8:
                        event_type = 'GBHL80'
                    else:
                        event_type = 'Other'
                except Exception as e:
                    print(f"Warning: Failed to read event type color from ({r2},{col_idx}): {e}")

                break

        if not event_date:
            continue

        # Parse event text
        lines = val.strip().split('\n')
        if len(lines) < 3:
            continue

        event_name = lines[0].strip()
        organizer = lines[1].strip()
        region_loc = lines[2].strip().strip('[]')
        region, location = region_loc.split(' - ') if ' - ' in region_loc else ('Unknown', region_loc)

        # Determine merged range (if any)
        merged = get_merged_range(row_idx, col_idx)
        if merged:
            end_col = merged['endColumnIndex'] - 1
            # Mark all merged cells as visited
            for r in range(merged['startRowIndex'], merged['endRowIndex']):
                for c in range(merged['startColumnIndex'], merged['endColumnIndex']):
                    visited.add((r, c))
        else:
            end_col = col_idx
            visited.add((row_idx, col_idx))

        # Get end date from the right-most merged cell's column
        end_date = event_date
        for r2 in range(row_idx, 0, -1):
            if (r2, end_col) in date_map:
                end_date = date_map[(r2, end_col)]
                break

        # === Extract format (Singles / Doubles) from the row below ===
        format_type = 'Unknown'

        # Determine the row below the event block
        format_row_idx = (merged['endRowIndex'] if merged else row_idx + 1)

        # Use the first column of the merged block if available, else same col
        format_col_idx = merged['startColumnIndex'] if merged else col_idx

        # Safely access the format cell
        if format_row_idx < len(grid):
            format_row = grid[format_row_idx]
            if format_col_idx < len(format_row.get('values', [])):
                format_cell = format_row['values'][format_col_idx]
                format_val = format_cell.get('formattedValue', '').strip().lower()
                if 'double' in format_val:
                    format_type = 'Doubles'
                elif 'single' in format_val:
                    format_type = 'Singles'

        event = {
            'start_date': event_date.strftime('%Y-%m-%d'),
            'end_date': end_date.strftime('%Y-%m-%d'),
            'event_name': event_name,
            'organizer': organizer,
            'region': region,
            'location': location,
            'format': format_type,
            'event_type': event_type,
            'link': link
        }

        events.append(event)




In [24]:
# === STEP 4: Show / Save Results ===
df = pd.DataFrame(events)
print(df.head())

   start_date    end_date                event_name      organizer region  \
0  2025-01-03  2025-01-03          A New Adventure!      Matt King     SW   
1  2025-01-04  2025-01-05             Into The West      Matt King     SW   
2  2025-02-01  2025-02-02             City of Steel       Ali King  N.Eng   
3  2025-03-01  2025-03-02  Defence of North Bristol  David Clubley     SW   
4  2025-01-04  2025-01-04           A Merrier World     Tim Elwess      C   

    location   format event_type                       link  
0    Cardiff  Singles      Other  https://fb.me/e/5DVsh3clY  
1    Cardiff  Singles      Other  https://fb.me/e/5zaqaw53a  
2  Sheffield  Singles      Other                             
3    Bristol  Doubles      Other  https://fb.me/e/ahB6QwPbj  
4    Lincoln  Unknown      Other  https://fb.me/e/801JX0b3c  


In [25]:
print(df.tail())

     start_date    end_date                    event_name        organizer  \
142  2025-10-25  2025-10-26            The Rings of Power  Natalie Pearson   
143  2025-11-29  2025-11-29         But It's Not This Day   Scott Hensford   
144  2025-10-25  2025-10-25       Great Deeds of War 2025   Rob Lainchbury   
145  2025-10-26  2025-10-26  Battle for Middle Earth 2025    Chris Jackson   
146  2025-11-29  2025-11-29            Angle Armies Clash     James Palmer   

    region      location   format event_type                       link  
142  N.Eng     Sheffield  Singles      Other  https://fb.me/e/8PaBzAFW9  
143     SE  High Wycombe  Singles      Other                             
144      C    Birmingham  Singles      Other                             
145     SE    Eastbourne  Singles      Other                             
146     SE       Woolpit  Doubles      Other  https://fb.me/e/4sF8NWo7G  


In [7]:
# Create a location string for geocoding
df['location_str'] = df['location'] + ', UK'

# Set up Nominatim geocoder
geolocator = Nominatim(user_agent="gbhl-event-locator")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)  # be polite!

# Apply geocoding
def get_coords(place):
    try:
        loc = geocode(place)
        if loc:
            return pd.Series([loc.latitude, loc.longitude])
    except:
        pass
    return pd.Series([None, None])

df[['lat', 'lon']] = df['location_str'].apply(get_coords)

In [14]:
# Create a column to flag if it is a two day event
df['event_duration'] = (df['start_date'] != df['end_date'])*1 + 1
df['event_duration'].replace(1, "One day", inplace=True)
df['event_duration'].replace(2, "Two days", inplace=True)

In [15]:
df

Unnamed: 0,start_date,end_date,event_name,organizer,region,location,format,event_type,link,location_str,lat,lon,event_duration
0,2025-01-03,2025-01-03,A New Adventure!,Matt King,SW,Cardiff,Singles,GBHL90,https://fb.me/e/5DVsh3clY,"Cardiff, UK",51.481655,-3.179193,One day
1,2025-01-04,2025-01-05,Into The West,Matt King,SW,Cardiff,Singles,GBHL90,https://fb.me/e/5zaqaw53a,"Cardiff, UK",51.481655,-3.179193,Two days
2,2025-02-01,2025-02-02,City of Steel,Ali King,N.Eng,Sheffield,Singles,GBHL90,,"Sheffield, UK",53.380663,-1.470228,Two days
3,2025-03-01,2025-03-02,Defence of North Bristol,David Clubley,SW,Bristol,Doubles,GBHL90,https://fb.me/e/ahB6QwPbj,"Bristol, UK",51.453802,-2.597298,Two days
4,2025-01-04,2025-01-04,A Merrier World,Tim Elwess,C,Lincoln,Unknown,GBHL90,https://fb.me/e/801JX0b3c,"Lincoln, UK",53.229354,-0.540482,One day
...,...,...,...,...,...,...,...,...,...,...,...,...,...
142,2025-10-25,2025-10-26,The Rings of Power,Natalie Pearson,N.Eng,Sheffield,Singles,GBHL90,https://fb.me/e/8PaBzAFW9,"Sheffield, UK",53.380663,-1.470228,Two days
143,2025-11-29,2025-11-29,But It's Not This Day,Scott Hensford,SE,High Wycombe,Singles,GBHL90,,"High Wycombe, UK",51.631745,-0.755960,One day
144,2025-10-25,2025-10-25,Great Deeds of War 2025,Rob Lainchbury,C,Birmingham,Singles,GBHL90,,"Birmingham, UK",52.479699,-1.902691,One day
145,2025-10-26,2025-10-26,Battle for Middle Earth 2025,Chris Jackson,SE,Eastbourne,Singles,GBHL90,,"Eastbourne, UK",50.766437,0.278155,One day


In [16]:
df.to_csv("data/gbhl_events.csv", index=False)

In [17]:
df.to_json('data/events.json', orient='records')