In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# import adastra

In [None]:
%load_ext autoreload
%autoreload 1
%aimport adastra

In [None]:
fa17missing = [
    ('OIE1000', 5/3),  # OIE
    ('ENGR3590', 10/3),  # CompRobo
    ('AHSE1148', 10/3),  # DIRT TO SHIRT
    ('ENGR3590', 3),  # ADE
    ('ENGR3240', 16/3),  # TELL THE STORY
    ('MTH2132', 10/3),  # BAYESIAN
    ('AHSE1100', 10/3),  # HISTECH
    ('ENGR3299', 8),  # RETURN DESIGN
]
sp18missing = [
    ('ENGR3299', 10/3),  # BIOMIMICRY
    ('ENGR3599A', 10/3),  # HTL
    ('ENGR3299B', 8),  # RETURN DESIGN
]
tup_to_dict = lambda a: [{'Course/Customer': code, 'Weekly Length': pd.to_timedelta(hours, unit='h')} for code, hours in a]
fa17missing, sp18missing = tuple(tup_to_dict(l) for l in [fa17missing, sp18missing])

In [None]:
fa17courses = adastra.load_schedule_file('data/ad astra test fa17.xls')
sp18courses = adastra.load_schedule_file('data/ad astra test sp18.xls')

In [None]:
add_missing = lambda a, m: pd.concat([a, pd.DataFrame(m)], sort=True, ignore_index=False).reset_index()
fa17courses = add_missing(fa17courses, fa17missing)
sp18courses = add_missing(sp18courses, sp18missing)
fa17courses[-10:]

In [None]:
fa17cat = pd.read_csv('Class and Room Categorization - Classes Fall 2017.csv', usecols=range(13))
fa17cat.drop_duplicates(subset='Course Code', inplace=True)
sp18cat = pd.read_csv('Class and Room Categorization - Classes Spring 2018.csv', usecols=range(13))
sp18cat.drop_duplicates(subset='Course Code', inplace=True)

In [None]:
category_colors = {  # map for category colors
    'PROJECT - BASED - 3d': '#1155cc',
    'PROJECT - BASED - 2d': '#3c78d8',
    'PROJECT - BASED - digital': '#6d9eeb',
    'LECTURE': '#e69138',
    'LAB': '#38761d',
    'NOT IN AC': '#cc0000',
    'DISCUSSION': '#674ea7'
}

category_names = {  # map for category names
    'PROJECT - BASED - 3d': 'project-based 3d',
    'PROJECT - BASED - 2d': 'project-based 2d',
    'PROJECT - BASED - digital': 'project-based digital',
    'LECTURE': '"lecture"',
    'LAB': 'lab',
    'DISCUSSION': 'discussion'
}

def merge(sched, cat, sched_code='Course/Customer', cat_code='Course Code'):

#     def agg_time(frame, group_col, agg_col):
#         return frame.groupby(group_col, as_index=False)[agg_col].agg(np.sum)

    return pd.merge(
        left=sched,
        right=cat,
        how='left',
        left_on=sched_code,
        right_on=cat_code,
        indicator=True,
#         validate='one_to_many'
    )

agg_by_time = lambda a: a.groupby('Course/Customer', as_index=False)['Weekly Length'].agg(np.sum)

def plot_pie_cat_totals(courses, group_col='Primary Category', agg_col='Weekly Length', fig=None):
    cat_totals = courses.groupby(group_col, as_index=False)[agg_col].agg(np.sum)
#     cat_totals.sort_values('Weekly Length', inplace=True)  # sort by value
    cat_totals.sort_values('Primary Category', inplace=True)  # sort by label
    
    # fig = plt.figure()
    return adastra.plot_pie(
        cat_totals[agg_col] / pd.Timedelta(1,unit='h'),
        labels=[category_names.get(category, category) for category in cat_totals[group_col]],
        colors=[category_colors.get(category, 'grey') for category in cat_totals[group_col]],
        fig=fig
        )

## Plot pie charts of courses per semester

In [None]:
cat_cols = ['Course Code','Course Title','Primary Category','Seconday Category']
fa17footprint, sp18footprint = tuple(merge(agg_by_time(sched), cat[cat_cols]) for sched, cat in [(fa17courses, fa17cat),(sp18courses, sp18cat)])

Check for missed courses

In [None]:
fa17footprint[fa17footprint['_merge'] == 'right_only']

In [None]:
sp18footprint[sp18footprint['_merge'] == 'right_only']

### Space Usage

In [None]:
footprints = [
    ('Fall 2017', fa17footprint),
    ('Spring 2018', sp18footprint)
]
for name, data in footprints:
    fig = plt.figure(figsize=(7,5))
    plt.title(name+' AC Usage by Course Category', y=1.05)
    plot_pie_cat_totals(data[data['Primary Category'] != 'NOT IN AC'], fig=fig)
    plt.savefig('figs/'+name.lower().strip(' ')+'_footprint'+'.eps')

### Course Offerings

In [None]:
agg_by_course = lambda a: a.groupby('Primary Category', as_index=False)['Course/Customer'].agg('count')
agg_by_course(fa17footprint)

In [None]:
agg_by_course(sp18footprint)

In [None]:
for name, data in footprints:
    fig = plt.figure(figsize=(7,5))
    plt.title(name+' Course Offerings by Category', y=1.05)
    cat_totals = agg_by_course(data[data['Primary Category'] != 'NOT IN AC'])
    
    adastra.plot_pie(
        cat_totals['Course/Customer'],
        labels=[category_names.get(category, category) for category in cat_totals['Primary Category']],
        colors=[category_colors.get(category, 'grey') for category in cat_totals['Primary Category']],
        fig=fig
    )
    plt.savefig('figs/'+name.lower().strip(' ')+'_offerings'+'.eps')

## Plot bar charts of courses per semester

In [None]:
import re

In [None]:
def plot_courses_bar(courses):
    def format_titles(titles):
        # parse out course title
        extractor = re.compile('^(?:(\w{3,4} ?\d{0,5}\w?)[:|/] )?(.*)$')
        just_titles = [extractor.match(a).group(2) for a in titles]
        # trim length of strings
        max_len = 45
        abbr_titles=[a[:max_len-3]+'...' if len(a) > max_len else a for a in just_titles]
        return abbr_titles
        
    return adastra.plot_barh(
        labels=format_titles(courses['Course Title'].astype(str)),
        values=courses['Weekly Length'] / pd.Timedelta(1, unit='h'),
        color=[category_colors.get(category, 'grey') for category in courses['Primary Category']],
        fig = plt.figure(figsize=(10,20))
    )

for name, data in footprints:
    fig = plot_courses_bar(data[data['Primary Category'] != 'NOT IN AC'][pd.notnull(data['Course Title'])].sort_values('Weekly Length'))
    plt.xlim([0,18])
    plt.title(name+' Courses by AC footprint')
    plt.xlabel('weekly room-hours')
    fig.subplots_adjust(left=0.5)  # make room for the course labels
    plt.savefig('figs/'+name.lower().strip(' ')+'_courses_footprint'+'.eps')

In [None]:
sp18courses[sp18courses['Course/Customer'] == 'ENGR2510']