# CPO Datascience

This program is intended for use by the Portland State University Campus Planning Office (CPO).  

In [55]:
#Import required packages
import os
import pandas as pd
import re
import numpy as np
import datetime
import matplotlib.pyplot as plt

In [64]:
def format_date(df_date):
    """
    Splits Meeting times into Days of the week, Start time, and End time using regex
    """
    df_date['Days'] = df_date['Meeting_Times'].str.extract('([^\s]+)', expand=True)
    df_date['Start_Date'] = df_date['Meeting_Dates'].str.extract('^(.*?)-', expand=True)
    df_date['End_Date'] = df_date['Meeting_Dates'].str.extract('((?<=-).*$)', expand=True)
    df_date['Start_Time'] = df_date['Meeting_Times'].str.extract('(?<= )(.*)(?=-)', expand=True)
    df_date['Start_Time'] = pd.to_datetime(df_date['Start_Time'], format='%H%M')
    df_date['End_Time'] = df_date['Meeting_Times'].str.extract('((?<=-).*$)', expand=True)
    df_date['End_Time'] = pd.to_datetime(df_date['End_Time'], format='%H%M')
    df_date['Duration_Hr'] = ((df_date['End_Time'] - df_date['Start_Time']).dt.seconds)/3600
    return df_date

def format_xlist(df_xl):
    """
    revises % capacity calculations by using Max Enrollment instead of room capacity.  
    """
    df_xl['%_Capacity'] = np.where(df_xl['Xlst'] != '', 
                                   df_xl['Actual_Enrl'].astype(int)/df_xl['Max_Enrl'].astype(int), 
                                   df_xl['Actual_Enrl'].astype(int)/df_xl['Room_Capacity'].astype(int)) 
    return df_xl

def final_print(df_print, school_print, term_print):
    print('===================================================================')
    print('Report for {0} - {1}'.format(school_print, term_print))
    print(df_print)
    print("Total Number of Classrooms Needed (Projected): ", df_print['Qty_Classrooms'].sum())
    print("Total Number of Seats Needed (Projected): ", df_print['Qty_Seats'].sum())
    print('===================================================================','\n')
    columns = ['Optimal_Size', 'Key', 'Qty_Classrooms']
    df_print['Key'] = term_print
    df_print['Optimal_Size'] = df_print['Optimal_Size'].astype(int)
    df_print = df_print[columns]
    return df_print

def plot_graphs(df_grph_lst, school_print, class_type_print):
    """
    Takes a list of dfs per term and plots them in a single figure.
    """
    class_dct = ({'DO' : 'Departmentally-Controlled Classrooms',
                  'GP' : 'General Pool Classrooms',
                  'ALL': 'Dept-Controlled and General Pool Classrooms'})
    df_all = pd.concat(df_grph_lst)
    df_group = df_all.groupby(['Optimal_Size', 'Key'])
    df_group_plot = df_group.sum().unstack('Key').plot(kind='bar')
    df_group_plot.set_xlabel('Classrooms by Size')
    df_group_plot.set_ylabel('Number of Classrooms Needed (Projected)')
    df_group_plot.set_title('{0} {1}'.format(school_print, class_dct[class_type_print]))
    df_group_plot.set_ylim([0, 5]) #Departmental view 
    #df_group_plot.set_ylim([0, 75]) # Uncomment for FULL CAMPUS VIEW
    plt.show()

def main():
    """
    Main program control flow.
    """
    pd.set_option('display.max_rows', None)
    df = pd.read_csv('data/PSU_master_classroom.csv')
    df = df.fillna('')

    df = format_date(df)
    # Avoid classes that only occur on a single day
    df = df.loc[df['Start_Date'] != df['End_Date']]

    # Calculate number of days per week and treat Sunday condition
    df['Days_Per_Week'] = df['Days'].str.len()
    df['Room_Capacity'] = df['Room_Capacity'].apply(lambda x: x if (x != 'No Data Available') else 0)

    df_cl = format_xlist(df)

   


In [65]:
main()