In [162]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

import numpy as np
from datetime import datetime

# fetch page content
url = "https://www.animeboston.com/schedule/index/2024"
soup = BeautifulSoup(requests.get(url).text, 'html.parser')

# find all tables
tables = soup.select("table.schedule_table")

# container for events
events = []



In [163]:
soup

<!DOCTYPE HTML>

<html>
<head>
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-38TMR0STT6"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'G-38TMR0STT6');
</script>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<title>Programming Schedule</title>
<link href="/images/favicon/57x57.png" rel="apple-touch-icon" type="image/png"/>
<link href="/images/favicon/76x76.png" rel="apple-touch-icon" sizes="76x76" type="image/png"/>
<link href="/images/favicon/120x120.png" rel="apple-touch-icon" sizes="120x120" type="image/png"/>
<link href="/images/favicon/152x152.png" rel="apple-touch-icon" sizes="152x152" type="image/png"/>
<link href="/images/favicon/180x180.png" rel="apple-touch-icon" sizes="180x180" type="image/png"/>
<link href="https://s3.amazonaws.com/www-ab/img/favico192.png" rel="icon" sizes="192x192" type="image/png"/>
<link href=

In [164]:
# extract legend
legend_items = soup.select("div.schedule-legend label.schedule-category-label")

# build color map
category_colors = {
    label.text.strip(): label.get("style").split("background-color:")[1].strip()
    for label in legend_items if "background-color:" in label.get("style", "")
}

In [165]:
category_colors

{'18+ Cosplay': '#AA99FF',
 '18+ Event': '#CC9966',
 '18+ Fan Creations': '#669966',
 '18+ Fan Panel': '#669900',
 '18+ Featured Panel': '#66CC00',
 '18+ Gameshow': '#9900FF',
 '18+ Guest Panel': '#669999',
 '18+ Video': '#F7921B',
 '21+ Event': '#FF0000',
 'Ball': '#CC9900',
 'Concerts': '#996633',
 'Cosplay Games': '#EEAEEE',
 'Event': '#E8D37E',
 'Fan Creations': '#CCFF66',
 'Fan Panel': '#00FF00',
 'Featured Artist': '#CCFFCC',
 'Featured Panel': '#00FF99',
 'Gameshow': '#72B7ED',
 'Guest Panel': '#00FFFF',
 'ID Check': '#999999',
 'Idol Events': '#FF99CC',
 'Industry Panel': '#3399FF',
 'Jam Zone': '#CC0000',
 'Karaoke': '#9933FF',
 'Libraries & Education': '#CCFF00',
 'Lolita & J-fashion': '#CC6699',
 'Maid Cafe': '#CCCCFF',
 'Premiere Video': '#FF9966',
 'Room Clear': '#CCCCCC',
 'Seating': '#CCCCCC',
 'Social Gatherings': '#FF3366',
 'Tabletop Gaming': '#CC0066',
 'Video': '#66A3D2',
 'Workshop': '#F7931E'}

In [166]:
room_headers = soup.select("table.schedule-table tr")[1].select("th.schedule-room")
rooms = [room.get_text(strip=True) for room in room_headers]
rooms

['Auditorium Events',
 'Ballroom A',
 'Ballroom B',
 'Fan Creations 312',
 'Maid Cafe',
 'Panel 202',
 'Panel 207',
 'Panel 208',
 'Panel 302',
 'Panel 309',
 'Panel 310',
 'Video 210',
 'Video 306',
 'Kings',
 'Grand Ballroom',
 'Panel Constitution',
 'Panel Gardner',
 'Panel The Fens',
 'Republic Ballroom',
 'RPG Riverway',
 'Video Hampton',
 'Workshop Fairfax']

In [167]:
url

'https://www.animeboston.com/schedule/index/2024'

In [173]:
def preprocess_tables(tables, ii, FirstColumnName='TimeSlot'):
    tables[ii].dropna(subset=[tables[ii].columns[0]], inplace=True)
    tables[ii] = tables[ii].iloc[:, :-1].copy()
    tables[ii].columns = [FirstColumnName] + tables[ii].columns[1:].tolist()
    #tables[ii].map(lambda x: x.replace('\n', '').strip() if isinstance(x, str) else x)

    # 24-hr military time
    vconvert = np.vectorize(lambda x: datetime.strptime(x, '%I:%M %p').strftime('%H:%M'))
    tables[ii][FirstColumnName] = vconvert(tables[ii][FirstColumnName])
    return tables[ii]

tables = pd.read_html(url)  # This reads the first two rows as column headers

# display cleaned version
#display(tables)

df = preprocess_tables(tables,0)
df

Unnamed: 0,TimeSlot,"(Hynes, Auditorium Events)","(Hynes, Ballroom A)","(Hynes, Ballroom B)","(Hynes, Fan Creations 312)","(Hynes, Maid Cafe)","(Hynes, Panel 202)","(Hynes, Panel 207)","(Hynes, Panel 208)","(Hynes, Panel 302)",...,"(Hynes, Video 306)","(Kings, Kings)","(Sheraton, Grand Ballroom)","(Sheraton, Panel Constitution)","(Sheraton, Panel Gardner)","(Sheraton, Panel The Fens)","(Sheraton, Republic Ballroom)","(Sheraton, RPG Riverway)","(Sheraton, Video Hampton)","(Sheraton, Workshop Fairfax)"
0,08:00,,,,,,,,,,...,Dual! Parallel Trouble Adventur e,,,,,,,,Soul Eater,
1,08:15,,,,,,,,,,...,Dual! Parallel Trouble Adventur e,,,,,,,,Soul Eater,
2,08:30,,,,,,,,,,...,Dual! Parallel Trouble Adventur e,,,,,,,,Soul Eater,
3,08:45,,,,,,,,,,...,Dual! Parallel Trouble Adventur e,,,,,,,,Soul Eater,
4,09:00,,,,AMV Genkis,,,,,,...,Dual! Parallel Trouble Adventur e,,,,,,,Tabletop Session 1 - A Strange New World,Soul Eater,Let's $ew! Ditto!
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,00:45,,Slumber Party,,Fanservi ce & Ecchi AMVs (18+),,,,,Taskmast er (18+),...,,,,,,,,,,
68,01:00,,Slumber Party,,Horror AMVs (18+),,,,,Taskmast er (18+),...,,,,,,,,,,
69,01:15,,Slumber Party,,Horror AMVs (18+),,,,,Taskmast er (18+),...,,,,,,,,,,
70,01:30,,,,Horror AMVs (18+),,,,,Taskmast er (18+),...,,,,,,,,,,


In [174]:
# add original row order as a helper column
df["RowOrder"] = df.index

# melt while preserving row order
long_df = df.melt(id_vars=["TimeSlot", "RowOrder"], 
                  var_name="Room", 
                  value_name="Event")

# drop rows without an event
events_df = long_df.dropna(subset=["Event"]).sort_values("RowOrder").drop(columns="RowOrder").reset_index(drop=True)
events_df

Unnamed: 0,TimeSlot,Room,Event
0,08:00,"(Hynes, Video 306)",Dual! Parallel Trouble Adventur e
1,08:00,"(Hynes, Video 210)",Lupin the 3rd: The Castle of Cagliost ro
2,08:00,"(Sheraton, Video Hampton)",Soul Eater
3,08:15,"(Hynes, Video 210)",Lupin the 3rd: The Castle of Cagliost ro
4,08:15,"(Sheraton, Video Hampton)",Soul Eater
...,...,...,...
899,01:30,"(Hynes, Fan Creations 312)",Horror AMVs (18+)
900,01:30,"(Hynes, Panel 302)",Taskmast er (18+)
901,01:45,"(Hynes, Panel 309)","Anime Statisti cs Hentai Edition - Numbers, Fo..."
902,01:45,"(Hynes, Fan Creations 312)",Horror AMVs (18+)


In [175]:
# group by Event and aggregate TimeSlot into a set
df_grouped = events_df.groupby(['Event','Room'])['TimeSlot'].agg(list).reset_index()
df_grouped.to_csv('test.csv', index=False, encoding='utf-8-sig')
df_grouped

Unnamed: 0,Event,Room,TimeSlot
0,"""Your Mom vs. the Noobs"" aka Otaku Mad Libs","(Sheraton, Panel Gardner)","[17:30, 17:45, 18:00, 18:15]"
1,50% Off,"(Hynes, Fan Creations 312)","[20:45, 21:00, 21:15, 21:30, 21:45, 22:00, 22:..."
2,A Brief History of Anime at the Movies,"(Hynes, Panel 207)","[16:30, 16:45, 17:00, 17:15]"
3,A Certain Magical Index,"(Hynes, Video 210)","[10:00, 10:15, 10:30, 10:45, 11:00, 11:15, 11:..."
4,A Plus Size Cosplaye r’s Survival Guide to Cos...,"(Sheraton, Workshop Fairfax)","[17:30, 17:45, 18:00, 18:15]"
...,...,...,...
155,Working in the JAV industry to anime voice act...,"(Hynes, Panel 208)","[19:00, 19:15, 19:30, 19:45]"
156,"Worldwea ving: Characte r Design, Clothing His...","(Hynes, Panel 309)","[11:00, 11:15, 11:30, 11:45]"
157,"Yoko Taro-ver se: Sex, Death, and Violence (18+)","(Hynes, Panel 207)","[18:00, 18:15, 18:30, 18:45]"
158,Yu Yu Hakusho Marathon,"(Sheraton, Video Hampton)","[18:00, 18:15, 18:30, 18:45, 19:00, 19:15, 19:..."


In [176]:
df_grouped[df_grouped['Event']=='Maid Café']

Unnamed: 0,Event,Room,TimeSlot
94,Maid Café,"(Hynes, Maid Cafe)","[12:00, 12:15, 12:30, 12:45, 13:00, 13:15, 13:..."


In [177]:
import pandas as pd

def split_event_to_subevents(df, event_col, time_col, room_col, chunk_size=3, target_event=None):
    """
    Splits a specific event into subevents by grouping its time slots into chunks.
    
    Parameters:
    - df: The DataFrame containing the events.
    - event_col: The name of the column containing event names.
    - time_col: The name of the column containing time slots.
    - room_col: The name of the column containing room information.
    - chunk_size: The number of time slots per subevent (default 3).
    - target_event: The specific event to split (e.g., 'Maid Café'). If None, all events are processed.
    
    Returns:
    - A DataFrame with subevents, grouped by event and room.
    """
    # If target_event is provided, filter only that event
    if target_event:
        df = df[df[event_col] == target_event]

    # Explode the TimeSlot column and sort by Room and TimeSlot
    exploded_df = (df.explode(time_col)
                   .sort_values(by=[room_col, time_col])
                   .reset_index(drop=True))

    # Create subevent labels based on chunking logic
    exploded_df['Subevent'] = exploded_df.groupby(room_col).cumcount() // chunk_size
    exploded_df['Subevent'] = exploded_df['Subevent'].apply(lambda x: f"{chr(65 + x)}")

    # Group by subevent and room, aggregate time slots into lists
    return (exploded_df
            .groupby([event_col, 'Subevent', room_col])[time_col]
            .agg(list)
            .reset_index())

# Example usage
# Assuming 'Event', 'TimeSlot', 'Room' are the column names in your DataFrame
subevent_df = split_event_to_subevents(df_grouped, 'Event', 'TimeSlot', 'Room', chunk_size=3, target_event="Maid Café")
subevent_df

Unnamed: 0,Event,Subevent,Room,TimeSlot
0,Maid Café,A,"(Hynes, Maid Cafe)","[12:00, 12:15, 12:30]"
1,Maid Café,B,"(Hynes, Maid Cafe)","[12:45, 13:00, 13:15]"
2,Maid Café,C,"(Hynes, Maid Cafe)","[13:30, 13:45, 14:00]"
3,Maid Café,D,"(Hynes, Maid Cafe)","[14:15, 14:30, 14:45]"
4,Maid Café,E,"(Hynes, Maid Cafe)","[15:00, 15:15, 15:30]"
5,Maid Café,F,"(Hynes, Maid Cafe)","[15:45, 16:00, 16:15]"
6,Maid Café,G,"(Hynes, Maid Cafe)","[16:30, 16:45, 17:00]"
7,Maid Café,H,"(Hynes, Maid Cafe)","[17:15, 17:30, 17:45]"


In [178]:
len(df_grouped)

160

### Notes

- Maid Cafe subevents are optional, but crucial to capture when repeating events
- So what is needed next is to have the event and timeslot taken out and scored with utility, combined with metadata such as the category number or whatnot.

In [179]:
# Drop rows for "Maid Café"
df_cleaned = df_grouped[df_grouped["Event"] != "Maid Café"]

# Add 'Event' column explicitly (already present)
# Combine
df_combined = pd.concat([df_cleaned, subevent_df], 
                        ignore_index=True)
df_combined

Unnamed: 0,Event,Room,TimeSlot,Subevent
0,"""Your Mom vs. the Noobs"" aka Otaku Mad Libs","(Sheraton, Panel Gardner)","[17:30, 17:45, 18:00, 18:15]",
1,50% Off,"(Hynes, Fan Creations 312)","[20:45, 21:00, 21:15, 21:30, 21:45, 22:00, 22:...",
2,A Brief History of Anime at the Movies,"(Hynes, Panel 207)","[16:30, 16:45, 17:00, 17:15]",
3,A Certain Magical Index,"(Hynes, Video 210)","[10:00, 10:15, 10:30, 10:45, 11:00, 11:15, 11:...",
4,A Plus Size Cosplaye r’s Survival Guide to Cos...,"(Sheraton, Workshop Fairfax)","[17:30, 17:45, 18:00, 18:15]",
...,...,...,...,...
162,Maid Café,"(Hynes, Maid Cafe)","[14:15, 14:30, 14:45]",D
163,Maid Café,"(Hynes, Maid Cafe)","[15:00, 15:15, 15:30]",E
164,Maid Café,"(Hynes, Maid Cafe)","[15:45, 16:00, 16:15]",F
165,Maid Café,"(Hynes, Maid Cafe)","[16:30, 16:45, 17:00]",G


In [180]:
subevents_df

Unnamed: 0,Event,Subevent,Room,TimeSlot
0,Maid Café,A,"(Hynes, Maid Cafe)","[12:00 pm, 12:15 pm, 12:30 pm]"
1,Maid Café,B,"(Hynes, Maid Cafe)","[12:45 pm, 1:00 pm, 1:15 pm]"
2,Maid Café,C,"(Hynes, Maid Cafe)","[1:30 pm, 1:45 pm, 2:00 pm]"
3,Maid Café,D,"(Hynes, Maid Cafe)","[2:15 pm, 2:30 pm, 2:45 pm]"
4,Maid Café,E,"(Hynes, Maid Cafe)","[3:00 pm, 3:15 pm, 3:30 pm]"
5,Maid Café,F,"(Hynes, Maid Cafe)","[3:45 pm, 4:00 pm, 4:15 pm]"
6,Maid Café,G,"(Hynes, Maid Cafe)","[4:30 pm, 4:45 pm, 5:00 pm]"
7,Maid Café,H,"(Hynes, Maid Cafe)","[5:15 pm, 5:30 pm, 5:45 pm]"


In [181]:
# Unit Test, observe the duplicates! (They should have subevents)
df_combined[df_combined["Event"].duplicated(keep=False)]

Unnamed: 0,Event,Room,TimeSlot,Subevent
64,ID Check Seating (18+),"(Hynes, Fan Creations 312)",[23:45],
65,ID Check Seating (18+),"(Hynes, Panel 202)","[21:00, 21:15]",
66,ID Check Seating (18+),"(Hynes, Panel 207)","[17:30, 17:45, 21:00, 21:15]",
67,ID Check Seating (18+),"(Hynes, Panel 302)","[23:00, 23:15]",
68,ID Check Seating (18+),"(Hynes, Panel 309)","[18:30, 18:45, 23:00, 23:15, 00:30, 00:45]",
69,ID Check Seating (18+),"(Hynes, Panel 310)","[23:30, 23:45]",
70,ID Check Seating (18+),"(Sheraton, Grand Ballroom)","[15:30, 15:45, 19:30, 19:45, 22:30, 22:45]",
71,ID Check Seating (18+),"(Sheraton, Panel Constitution)","[22:00, 22:15]",
113,Room Clear,"(Hynes, Auditorium Events)","[11:30, 11:45, 15:00, 15:15]",
114,Room Clear,"(Hynes, Ballroom A)","[14:30, 14:45, 16:00, 16:15, 21:00, 21:15]",


In [182]:
exclude_from_scheduling = [
    "Room Clear",
    "Seating",
    "ID Check Seating (18+)"
]
df_final = df_combined[~df_combined["Event"].isin(exclude_from_scheduling)].reset_index(drop=True).copy()
df_final

Unnamed: 0,Event,Room,TimeSlot,Subevent
0,"""Your Mom vs. the Noobs"" aka Otaku Mad Libs","(Sheraton, Panel Gardner)","[17:30, 17:45, 18:00, 18:15]",
1,50% Off,"(Hynes, Fan Creations 312)","[20:45, 21:00, 21:15, 21:30, 21:45, 22:00, 22:...",
2,A Brief History of Anime at the Movies,"(Hynes, Panel 207)","[16:30, 16:45, 17:00, 17:15]",
3,A Certain Magical Index,"(Hynes, Video 210)","[10:00, 10:15, 10:30, 10:45, 11:00, 11:15, 11:...",
4,A Plus Size Cosplaye r’s Survival Guide to Cos...,"(Sheraton, Workshop Fairfax)","[17:30, 17:45, 18:00, 18:15]",
...,...,...,...,...
140,Maid Café,"(Hynes, Maid Cafe)","[14:15, 14:30, 14:45]",D
141,Maid Café,"(Hynes, Maid Cafe)","[15:00, 15:15, 15:30]",E
142,Maid Café,"(Hynes, Maid Cafe)","[15:45, 16:00, 16:15]",F
143,Maid Café,"(Hynes, Maid Cafe)","[16:30, 16:45, 17:00]",G


### What's next

Tidying
- Need to have a way to link back and make sure that the events exploded list is consistent... could just explode this list

Scoring needed!
- Have a way to score this, probably write to csv, then score utility from 1 - 10, 10 being amazing, and 1 being absolutely not, this could be rated from multiple people and averaged among them (or take the minimum among all there)

Would be nice, best practice, can be later
- Afterward, there is a checking process for seeing unique constraints on the time slots, this could be a function or some such



In [183]:
df_final_exploded = df_final.explode('TimeSlot').sort_values(by=['Room', 'Event']).reset_index(drop=True)
df_final_exploded

Unnamed: 0,Event,Room,TimeSlot,Subevent
0,Cosplay Death Match,"(Hynes, Auditorium Events)",13:00,
1,Cosplay Death Match,"(Hynes, Auditorium Events)",13:15,
2,Cosplay Death Match,"(Hynes, Auditorium Events)",13:30,
3,Cosplay Death Match,"(Hynes, Auditorium Events)",13:45,
4,Cosplay Death Match,"(Hynes, Auditorium Events)",14:00,
...,...,...,...,...
813,"Materia Girl: Women in Final Fantasy, On Scree...","(Sheraton, Workshop Fairfax)",22:45,
814,Sketchbo ok Swap,"(Sheraton, Workshop Fairfax)",19:00,
815,Sketchbo ok Swap,"(Sheraton, Workshop Fairfax)",19:15,
816,Sketchbo ok Swap,"(Sheraton, Workshop Fairfax)",19:30,


In [184]:
df_final_timeslotgrouped = df_final_exploded.groupby(['TimeSlot'])[['Event']].agg(set).reset_index()

In [185]:
#df_final_timeslotgrouped['Event'].iloc[0]

# {'A Certain Magical Index',
#  'AMV Genkis',
#  'Berklee Anime Band',
#  'Building a Manga Collecti on for a Library',
#  'Haibane Renmei',
#  'Irasshai mase! A Ramen Journey',
#  'Japanese Commerci als to Make You Laugh, Cry, and Think',
#  "Let's $ew! Ditto!",
#  'Opening Ceremoni es',
#  'SSSS. Gridman',
#  'Tabletop Session 1 - A Strange New World',
#  'The Turn of the Last Century: The Anime',
#  'Why People Love Japanese Snacks'}


In [186]:
# keep only first duplicates
# make Event lists hashable for deduplication
df_final_timeslotgrouped["Event_tuple"] = df_final_timeslotgrouped["Event"].apply(tuple)

# drop duplicates based on the Event list only, keeping the first time slot
df_dedupped_by_event = df_final_timeslotgrouped.drop_duplicates(subset="Event_tuple", keep="first")

# drop the helper column
df_dedupped_by_event = df_dedupped_by_event.drop(columns="Event_tuple")
df_dedupped_by_event


Unnamed: 0,TimeSlot,Event
0,00:00,"{Taskmast er (18+), Slumber Party, Fanservi ce..."
2,00:30,"{Slumber Party, Fanservi ce & Ecchi AMVs (18+)..."
4,01:00,"{Slumber Party, Horror AMVs (18+), Anime Stati..."
6,01:30,"{Horror AMVs (18+), Anime Statisti cs Hentai E..."
8,08:00,"{Soul Eater, Lupin the 3rd: The Castle of Cagl..."
12,09:00,"{AMV Genkis, Lupin the 3rd: The Castle of Cagl..."
14,09:30,"{AMV Genkis, Lupin the 3rd: The Castle of Cagl..."
16,10:00,"{AMV Genkis, The Turn of the Last Century: The..."
17,10:15,"{The Turn of the Last Century: The Anime, Buil..."
18,10:30,"{The Turn of the Last Century: The Anime, Buil..."
