# Boxplots

## Script setup

In [25]:
# Third-party and standard modules
from typing import Optional
import numpy as np
import pandas as pd
import seaborn
import matplotlib.pyplot as plt
import pytz
from itertools import repeat
from importlib import reload

# User-defined modules.
# The reload() function is needed to update modules after
# changes are made to their files.
from modules import dataframe_manip as dfm
from modules import misc, schemas, College1AcPtTimeSeries
# The reload() function is needed to update modules after
# changes are made to their files.
dfm = reload(dfm)
misc = reload(misc)
schemas = reload(schemas)
College1AcPtTimeSeries = reload(College1AcPtTimeSeries)

## Script parameters

The following values are this script's parameters.

In [26]:
filepath = './cisco_wifi_data_until_20190513.csv'
timezone = pytz.timezone('US/Pacific')
schema: schemas.AcPtTimeSeries =\
    College1AcPtTimeSeries.College1AcPtTimeSeries
buildings = [
    'SCC', 'HAHN', 'CROOKSHANK', 'THATHCHER', 'CLARK',
    'CARNEGIE', 'MASON', 'PEARSON', 'ALEXANDER', 'ITB'
]

## Data Processing

In [27]:
# A time-series dataframe holding occupancy counts
# for many access points (columns).
occ: pd.DataFrame = dfm.csv_to_timeseries_df(
    filepath=filepath, timezone=timezone
)

In [28]:
acpts = occ.columns.to_list()
acpts.sort()

buildings_in_cols = [False] * len(buildings)
for bi in range(0, len(buildings)):
    for a in acpts:
        if buildings[bi] in a:
            buildings_in_cols[bi] = True
            break

In [34]:
print(buildings)
print(buildings_in_cols)
print(list(zip(buildings, buildings_in_cols)))

['SCC', 'HAHN', 'CROOKSHANK', 'THATHCHER', 'CLARK', 'CARNEGIE', 'MASON', 'PEARSON', 'ALEXANDER', 'ITB']
[True, True, False, False, True, True, True, False, True, True]
[('SCC', True), ('HAHN', True), ('CROOKSHANK', False), ('THATHCHER', False), ('CLARK', True), ('CARNEGIE', True), ('MASON', True), ('PEARSON', False), ('ALEXANDER', True), ('ITB', True)]


In [29]:
# Eliminate unnecessary columns.
occ.drop(columns=list(filter(
    # add to drop-list if NOT in the buildings we want.
    lambda col: schema.col_to_building(col, safe=True) not in buildings,
    occ.columns.to_list()
)))

occ = dfm.fill_intervening_nas(
    occ, inplace=True, fill_val=0
)

In [30]:
occ_bybuild = occ.groupby(
    by=schema.col_to_building, axis=1
)

print(occ_bybuild.groups)

Exception: No matching buildings for 'POM-MUDD-B02'