In [81]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt

# function name: read_as_dataframe
# argument(s): 1 string representing the name of the CSV file
# description: converts data in a CSV file to a Pandas DataFrame
# return: 1 Pandas DataFrame
def read_as_dataframe(csv_filename):
    # open csv and make dataframe
    df = pd.read_csv(csv_filename)
    return df

# function name: process_df
# argument(s):  1 string representing the file name of the data set
# description: calls the necessary functions to read the data set into a DataFrame and clean it
# return: df
def process_df(filename):
    # read and make DataFrame from csv
    df = read_as_dataframe(filename)
    return df

# function name: isolate_sem
# argument(s): 1 dataframe, 1 string representing semester
# description: isolates data by semester
# return: df by semester
def isolate_sem(df, col):
    df_iso = df[df['Semester'] == col]
    df_iso = df_iso.reset_index(drop = True)
    return df_iso

# function name: isolate_course
# argument(s): 1 dataframe, 1 string representing course title
# description: isolates data by course
# return: df by course
def isolate_course(df, col):
    df_iso = df[df['Course'] == col]
    df_iso = df_iso.reset_index(drop = True)
    return df_iso

# function name: facility_counts
# argument(s): 1 dataframe
# description: provides output of facility counts
# return: summary table with facility counts 
def facility_counts(df):
    sum_table = df.groupby('Facility').size().reset_index(name = '  N  ').sort_values(by = '  N  ', ascending = False)
    sum_table = sum_table.reset_index(drop = True)
    sum_table.index += 1
    return sum_table

# function name: facility_list
# argument(s): 1 dataframe
# description: gets list of facilities
# return: one list of facilities for that semester
def facility_list(df):
    df_unique = df['Facility'].unique()
    return df_unique

# function name: q_cts
# argument(s): 1 dataframe
# description: makes new dataframe with counts and percentages for agree/disagree for question 6
# return: 1 dataframe
def q_cts(df):
    # questions lists
    qs = ['Patient Diversity', 'Learning Outcomes', 'Orientation Provided', 'RNs as Role Models', 'Staff Responsibility', 'Record Access', 'Staff/Student Relationship', 'Facility Space', 'Recommend Facility', 'Issues Resolved with Staff']
    q_ct = []
    ct = 1
    new_df = df
    
    # for question column count yes and no
    for item in qs:
        agree = df[df[item] == 'Agree'].shape[0]
        disagree = df[df[item] == 'Disagree'].shape[0]
        numbered = str(ct)
        
        if agree == 0 and disagree == 0:
            q_ct.append([numbered +'. '+ item, agree, disagree, '0%', '0%'])
            ct += 1
        else:
            # calculate averages
            a_per = agree/(agree+disagree)
            a_per_str = '{0:.0%}'.format(a_per)
            d_per = disagree/(agree+disagree)
            d_per_str = '{0:.0%}'.format(d_per)
            # add to dataframe
            q_ct.append([numbered +'. '+ item, agree, disagree, a_per_str, d_per_str])
            ct += 1
    
    # new dataframe
    columns = ['Question', 'Agree', 'Disagree', 'Agree %', 'Disagree %']
    qs_df = pd.DataFrame(columns = columns, data = q_ct)
    
    avgs = ['Average']
    avg_cols = ['Agree', 'Disagree', 'Agree %', 'Disagree %']
    
    # add average row
    for column in avg_cols:
        if column == 'Agree %' or column == 'Disagree %':
            column_stripped = qs_df[column].str.rstrip('%').astype('float')
            avg = column_stripped.mean()/100
            avg = '{0:.0%}'.format(avg)
            avgs.append(avg)
        else:
            avg = qs_df[column].mean()
            avg = int(avg)
            avgs.append(avg)
    
    avg_df = pd.Series(data = avgs, index = columns)
    
    complete_df = qs_df.append(avg_df, ignore_index=True)
    complete_df.index += 1
    return complete_df

# convert dataframe to matplotlib table
def render_mpl_table(data, col_width=3.0, row_height=0.625, font_size=14,
                     header_color='#006400', row_colors=['#f1f1f2', 'w'], edge_color='w',
                     bbox=[0, 0, 1, 1], header_columns=0,
                     ax=None, **kwargs):
    if ax is None:
        size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
        fig, ax = plt.subplots(figsize=size)
        ax.axis('off')
    mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
    mpl_table.auto_set_font_size(False)
    mpl_table.set_fontsize(font_size)

    for k, cell in mpl_table._cells.items():
        mpl_table.auto_set_column_width(k)
        cell.set_edgecolor(edge_color)
        cell.set_text_props(wrap=True)
        if k[0] == 0 or k[1] < header_columns:
            cell.set_text_props(weight='bold', color='w')
            cell.set_facecolor(header_color)
        else:
            cell.set_facecolor(row_colors[k[0]%len(row_colors) ])
    
    return ax.get_figure(), ax
    
def main():
    semester = input('Enter a semester cohort (ie Fall 2020) and press the Enter key: ')
    filename = input('Enter the source CSV filename (ie Fall_2020_Eval) and press the Enter key: ')
    filename = filename + '.csv'
    
    df = process_df(filename)
    sem_df = isolate_sem(df, semester)
    
    courses = ['NURP 1010', 'NURS 1010', 'NURS 1020', 'NURS 1023', 'NURS 2040', 'NURS 2050 Maternity', 'NURS 2050 Pediatrics', 'NURS 2060', 'NURS 2500', 'NURS 2500 Capstone']
    
    print('')
    print('Analyzing Data for %s' %semester)
    print('--------------------------------')

    for x in courses:
        print ('...%s Complete' %x)
        course_df = isolate_course(sem_df, x)

        # show summary table of facilites for the semester
        facilities = facility_counts(course_df)
        
        # calculate N
        fac_n = facilities['  N  '].sum()
        
        # format table
        fac_fig, gen_ax = render_mpl_table(facilities)
        fac_fig.suptitle('%s - %s Facility Counts\nn = %s' %(semester, x, fac_n), fontsize = 16, y = 1.10)
        fac_fig.savefig('%s_%s_facility.png' %(semester, x), bbox_inches = 'tight')
        
        #calculate general agree/disagree data
        general_cts = q_cts(course_df)
        
        # general agree/disagree visualization
        new_general_cts = general_cts.drop(11)
        gen_bar = new_general_cts.set_index('Question').plot.bar(color = {'orange', '#006400'}, title = '%s - %s Overall' %(semester, x), figsize = (15,6))
        gen_bar.set(ylabel = "Number of Responses")
        gen_bar.get_figure().savefig('%s_%s_overall_graph.png' %(semester, x), bbox_inches = 'tight')
        
        # format table
        gen_fig, gen_ax = render_mpl_table(general_cts)
        gen_fig.suptitle('%s - %s Overall\nn = %s' %(semester, x, fac_n), fontsize = 16)
        gen_fig.savefig('%s_%s_overall.png' %(semester, x), bbox_inches = 'tight')
        
        # generate individual facility tables
        list_of_facilities = facility_list(course_df)
        for site in list_of_facilities:
            facility = course_df[course_df['Facility'] == site]
            fac_cts = q_cts(facility)
            
            # isolate N for facility
            fac_n_df = facilities[facilities['Facility'] == site]
            fac_n_df = fac_n_df.reset_index(drop = True)
            fac_n = fac_n_df['  N  '].iloc[0]
            
            # format table
            fig,ax = render_mpl_table(fac_cts)
            fig.suptitle('%s - %s\n%s\nn = %s' %(semester, x, site, fac_n), fontsize = 16)
            fig.savefig('%s_%s_%s.png' %(semester, x, site))
            plt.close('all')
    print('--------------------------------')
    print('%s Analysis Complete' %(semester))

main()

Enter a semester cohort (ie Fall 2020) and press the Enter key: Fall 2020
Enter the source CSV filename (ie Fall_2020_Eval) and press the Enter key: Fall_2020_Eval

Analyzing Data for Fall 2020
--------------------------------
...NURP 1010 Complete
...NURS 1010 Complete
...NURS 1020 Complete
...NURS 1023 Complete
...NURS 2040 Complete
...NURS 2050 Maternity Complete
...NURS 2050 Pediatrics Complete
...NURS 2060 Complete
...NURS 2500 Complete
...NURS 2500 Capstone Complete
--------------------------------
Fall 2020 Analysis Complete
