# CPO Datascience

This program is intended for use by the Portland State University Campus Planning Office (CPO).  

In [158]:
#Import required packages
import os
import pandas as pd
import re
import numpy as np
import datetime
import matplotlib.pyplot as plt

In [183]:
def format_date(df_date):
    """
    Splits Meeting times into Days of the week, Start time, and End time using regex
    """
    df_date['Days'] = df_date['Meeting_Times'].str.extract('([^\s]+)', expand=True)
    df_date['Start_Date'] = df_date['Meeting_Dates'].str.extract('([^\s]+)', expand=True)
    df_date['Start_Month'] = pd.to_datetime(df_date['Start_Date'][0:2], format='%b')
    df_date['End_Date'] = df_date['Meeting_Dates'].str.extract('(?<=-)(.*)(?= )', expand=True)
    df_date['End_Month'] = pd.to_datetime(df_date['End_Date'], format='%b')
    df_date['Start_Time'] = df_date['Meeting_Times'].str.extract('(?<= )(.*)(?=-)', expand=True)
    df_date['Start_Time'] = pd.to_datetime(df_date['Start_Time'], format='%H%M')
    df_date['End_Time'] = df_date['Meeting_Times'].str.extract('((?<=-).*$)', expand=True)
    df_date['End_Time'] = pd.to_datetime(df_date['End_Time'], format='%H%M')
    df_date['Duration_Hr'] = ((df_date['End_Time'] - df_date['Start_Time']).dt.seconds)/3600
    return df_date

def format_xlist(df_xl):
    """
    revises % capacity calculations by using Max Enrollment instead of room capacity.  
    """
    df_xl['%_Capacity'] = np.where(df_xl['Xlst'] != '', 
                                   df_xl['Actual_Enrl'].astype(int)/df_xl['Max_Enrl'].astype(int), 
                                   df_xl['Actual_Enrl'].astype(int)/df_xl['Room_Capacity'].astype(int)) 
    not_types = [np.nan, np.inf]
    df_xl = df_xl.loc[~df_xl['%_Capacity'].isin(not_types)]
    return df_xl


def plot_graph(xs, ys):
    """
    Takes a list of dfs per term and plots them in a single figure.
    """
    plt.scatter(xs, ys)
    plt.show()


def main():
    """
    Main program control flow.
    """
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    df = pd.read_csv('data/PSU_master_classroom.csv')
    df = df.fillna('')

    df = format_date(df)
    # Avoid classes that only occur on a single day
    df = df.loc[df['Start_Date'] != df['End_Date']]

    # Calculate number of days per week and treat Sunday condition
    df['Days_Per_Week'] = df['Days'].str.len()
    df['Room_Capacity'] = df['Room_Capacity'].apply(lambda x: x if (x != 'No Data Available') else 0)

    df_cl = format_xlist(df)
    dep_var = df_cl['%_Capacity']
    
    plot_graph(df_cl['Start_Month'], dep_var)
   


In [184]:
main()

['1900-06-01T00:00:00.000000000' '1900-12-01T00:00:00.000000000'
 '1900-03-01T00:00:00.000000000' '1900-04-01T00:00:00.000000000'
 '1900-08-01T00:00:00.000000000' '1900-05-01T00:00:00.000000000'
 '1900-02-01T00:00:00.000000000' '1900-01-01T00:00:00.000000000'
 '1900-11-01T00:00:00.000000000' '1900-10-01T00:00:00.000000000'
 '1900-09-01T00:00:00.000000000' '1900-07-01T00:00:00.000000000']


TypeError: invalid type promotion