### Running the cell below will download all the parking occupancy excel files from 2016 Fall to 2023 Summer

The downloaded file will be saved in the directory named `/data/excels/`, and in the format:

`parking_[academic year]_[quarter].xlsx`

In [1]:
from bs4 import BeautifulSoup
import requests 
import re
import os

In [2]:
url = "https://rmp-wapps.ucsd.edu/TS/Survey/Survey%20of%20Parking%20Space%20Occupancy%20Levels/Quarterly%20Tables/"
html_doc = requests.get(url + "Contents.html").text
soup = BeautifulSoup(html_doc, 'html.parser')

In [3]:
# create the directory /data if not exist
try:
    os.mkdir('data/')
    os.mkdir('data/excels')
except FileExistsError:
    pass

# get all hrefs
hrefs = soup.find_all('a')
years = list(range(2017, 2025))

for h in hrefs:
    # filtering out PDFs
    if h.get_text() != 'Excel': continue

    file_name = h['href']

    # filtering out all data before 2016/2017 academic year
    if not int(re.findall(r'-(\S+)-', file_name)[0]) in years: continue

    # download the file
    path = url+file_name
    r = requests.get(path, allow_redirects=True)

    # rename and save the file
    year = re.findall(r'%([\S\-]+)%', file_name)[0].split('-')[1]
    quarter = re.findall(r'20([A-z]+).', file_name)[0]
    out = "data/excels/parking_"+str(int(year)-1)+"_"+str(int(year))+"_"+quarter+".xlsx"
    open(out, "wb").write(r.content)