In [918]:
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import re

import warnings
warnings.filterwarnings("ignore")

In [919]:
path = 'SWD.html'

In [920]:
def readHTML(path):
    with open(path, 'r', encoding='utf-8') as file:
        return file.read()
    
def extractData(table):
    headers = [th.text.strip() for th in table.find_all('th')]
    rows = []
    for tr in table.find_all('tr'):
        cells = tr.find_all('td')
        if cells:
            rows.append([cell.text.strip() for cell in cells])
    return pd.DataFrame(rows, columns=headers)

def parseHTML(data):
    soup = BeautifulSoup(data, 'html.parser')
    tables = soup.find_all('table')    
    balance = extractData(tables[0])
    other   = extractData(tables[1])
    dues    = extractData(tables[2])
    return balance, other, dues

def plotData(df, title=None):
    dfSorted = df.sort_index()
    dfSorted.plot(kind='bar', figsize=(12, 6), title=title)
    plt.ylabel('Dues')
    plt.xticks(rotation=45)
    plt.grid(axis='y')
    plt.show()

In [921]:
data = readHTML(path)
balance, other, dues = parseHTML(data)

In [922]:
dues = dues.drop(columns=['RECNO', 'IDNO', 'SNAME'])

allFests    = ['APOGEE', 'BOSM', 'OASIS', 'INTERBITS']
allEateries = ['ANC', 'F301', 'FM', 'LOOTERS', 'PARADISE', 'TOTT']
allMonths   = ['DEC', 'JAN', 'FEB', 'MAR', 'APR', 'MAY']

def include(df, inclusions):
    return df[[col for col in df.columns if any(inc in col.upper() for inc in inclusions)]]

In [923]:
def process(df, catName):
    df = include(df, catName)
    df = df.replace(r'^\s*$', 0, regex=True)
    df = df.apply(pd.to_numeric, errors='ignore')
    df = df.loc[:, (df != 0).any(axis=0)]
    if len(catName) == 1:
        df.columns = [re.sub(rf'{catName[0]}', '', col) for col in df.columns]
    df.columns = [re.sub(r'20|23|24', '', col) for col in df.columns]
    df.columns = [re.sub(r'_', ' ', col) for col in df.columns]
    df.columns = [re.sub(r'^\s+', '', col) for col in df.columns] 
    return df.iloc[:, df.iloc[0].argsort()[::-1]].T

In [924]:
su        = process(dues, 'SU')
jh        = process(dues, 'JH')
laundry   = process(dues, 'LAUNDR')

In [925]:
fests     = process(dues, allFests)
oasis     = process(dues, ['OASIS'])
apogee    = process(dues, ['APOGEE'])
bosm      = process(dues, ['BOSM'])
interbits = process(dues, ['INTERBITS'])

In [926]:
eateries  = process(dues, allEateries)
anc       = process(dues, ['ANC'])
f301      = process(dues, ['F301'])
fm        = process(dues, ['FM'])
looters   = process(dues, ['LOOTERS'])
paradise  = process(dues, ['PARADISE'])
tott      = process(dues, ['TOTT'])

In [927]:
dec   = process(dues, ['DEC'])
jan   = process(dues, ['JAN'])
feb   = process(dues, ['FEB'])
march = process(dues, ['MARCH'])
april = process(dues, ['APRIL'])
may   = process(dues, ['MAY'])

In [928]:
decEateries   = process(eateries.T, ['DEC'])
janEateries   = process(eateries.T, ['JAN'])
febEateries   = process(eateries.T, ['FEB'])
marchEateries = process(eateries.T, ['MARCH'])
aprilEateries = process(eateries.T, ['APRIL'])
mayEateries   = process(eateries.T, ['MAY'])

In [None]:
su

In [None]:
fests

In [None]:
eateries

In [None]:
plotData(april)