### Demo Reports

Make demo reports for each department detailing the students that go to other schools instead of Siena.

Import necessary libraries.

In [1]:
import numpy as np
import pandas as pd
import altair as alt
import sys
import warnings
warnings.filterwarnings('ignore')

import os,glob,subprocess

sys.path.insert(0,'../src/visualization/')
import visualize as vis

Load in the data file.

In [2]:
df = pd.read_csv('../data/processed/CriticalPath_Data_EM_Confidential_lessNoise.csv').drop(columns='Unnamed: 0')

Create Demo Reports

The code below will generate plots for each individual major, and the umbrella they fall underneath as well.  It will then generate a .tex file for each department that had students choose other schools over Siena College.



In [63]:
ourmajors = {"Physics":["PHYS","PYEN","PYED"], "Biology":["BIBS","BIBA","BIED","BICM"],
            "Chemistry":["CHEM","CHED","CHEN","BICM"],"Math":["MTED","MTBS","MTBA"],
            "Sociology":["SWRK","SOCI"],"Pre-Laws":["HIST","PHIL","POSC","CLAS","AMST"],
            "Languages":["FREN","FRED","SPAN","SPED"], "Pyschology":["PSYC"],
            "Undeclared":["UNSE","UNSC","UNBU","UNBE","UNAR","UNAE"],
            "Religion":["RELG"],"Health-Studies":["HLST","HSAD","HSHS","HSHP"],
            "Finance":["FNED","FINC"],"Environmental-Studies":["ENVS","ENVA"],
            "English":["ENGL","ENED"],"Computer-Science":['CSIS'],
            "Data-Science":["DASC"],"Communications":["COMM"], "Actuarial-Science":["ACSC","ACBS"],
            "Marketing":["MRKT"],"Management":["MGMT"], "Arts":["CREA"],"Nursing":["NRBL"], "HYED":["HYED"]
            }

footer = r'''\end{document}'''

for key in ourmajors.keys():
    codes = ourmajors[key]
    print(key)    
    for code in codes:
        # Plot of top 10 (?) schools to which students go otherwise for code/major
        source = df[['College_chosen_by_non-matrics','Unique_student_ID','Major']].where(df.Major==code).dropna().groupby(["Major","College_chosen_by_non-matrics"]).count().sort_values("Unique_student_ID",ascending=False)[:10]

        chart = alt.Chart(source.reset_index()).mark_bar(color='orange').encode(
            y = alt.Y("College_chosen_by_non-matrics:N", axis=alt.Axis(title=''),
            sort = alt.EncodingSortField(
                op = "sum",
                field = "College_chosen_by_non-matrics",
                order = "ascending")),
            x = alt.X("Unique_student_ID", axis=alt.Axis(title='# of Students'))
            ).properties(title=f"{code}")
        
        chart.save(f"../reports/demo_reports/{key}_{code}_other_choices.png")

    # Plot of top 10 (?) schools to which students go otherwise for key/ourmajor

    source = df[['College_chosen_by_non-matrics','Unique_student_ID','Major']].where(df.Major.isin(codes)).dropna().groupby(["College_chosen_by_non-matrics"]).count().sort_values("Unique_student_ID",ascending=False)[:10]
        
    if len(source>0):

        chart = alt.Chart(source.reset_index()).mark_bar(color='blue').encode(
            y = alt.Y("College_chosen_by_non-matrics:N", axis=alt.Axis(title=''),
            sort = alt.EncodingSortField(
                op = "sum",
                field = "College_chosen_by_non-matrics",
                order = "ascending")),
            x = alt.X("Unique_student_ID", axis=alt.Axis(title='# of Students'))
            ).properties(title=f"Top Colleges for Competition: {key}")

        chart.save(f"../reports/demo_reports/{key}_ALL_other_choices.png")

    ############################################
    # Now write the report for this Department #
    ############################################
    
    header = r'''\documentclass{article}
    \usepackage{fullpage}
    \usepackage{graphicx}
    \usepackage{float}
    \title{%s Department}
    \author{Siena College}
    \date{\today}
    \begin{document}
    \maketitle
    ''' % key
    
    main = "Here is the Enrolled/Applied/Accepted breakdown for your department: \n"
        
    num_applied = len(df[['Major','Admission_status']].where(df.Major.isin(codes)).dropna())
    num_accepted = len(df[['Major','Admission_status']].where((df.Major.isin(codes)) & (df.Admission_status!='Applied')).dropna())
    num_enrolled = len(df[['Major','Admission_status']].where((df.Major.isin(codes)) & (df.Admission_status=='Enrolled')).dropna())
    
    main = main + r'''
    \begin{center}
    \begin{tabular}{ c | c c c }
     Code & Enrolled & Accepted & Applied \\ 
     Total & %s & %s & %s'''  % (num_enrolled,num_accepted,num_applied)
    
    if len(codes)>1:
        for code in codes:
            num_applied = len(df[['Major','Admission_status']].where((df.Major==code)).dropna())
            num_accepted = len(df[['Major','Admission_status']].where((df.Major==code) & (df.Admission_status!='Applied')).dropna())
            num_enrolled = len(df[['Major','Admission_status']].where((df.Major==code) & (df.Admission_status=='Enrolled')).dropna())
            
            main = main + r''' \\
            %s & %s & %s & %s''' % (code,num_enrolled,num_accepted,num_applied)
            
    main = main + '''
    \end{tabular}
    \end{center}'''

    main = main + "These colleges are the top competition for your department: \n"

    file = f'{key}_ALL_other_choices.png'

    main = main +  r'''\begin{figure}[H]
    \centering
    \includegraphics[width = 0.99\textwidth]{%s}{\hspace{0.2 in}}
    \caption{Top competition overall for the department.}
    \end{figure}
    ''' % file

    if len(codes) > 1:

        for code in codes:

            file = f'{key}_{code}_other_choices.png'

            main = main + r'''\begin{figure}[H]
            \centering
            \includegraphics[width = 0.99\textwidth]{%s}{\hspace{0.2 in}}
            \caption{Top competition specifically for those who declared major is %s.}
            \end{figure}
            ''' % (file,code)

    content = header + main + footer

    with open(f'../reports/demo_reports/{key}.tex','w') as f:
         f.write(content)
            
#     commandLine = subprocess.Popen(['pdflatex', '../reports/demo_reports/myfile.tex'])
#     commandLine.communicate()

Physics
Biology
Chemistry
Math
Sociology
Pre-Laws
Languages
Pyschology
Undeclared
Religion
Health-Studies
Finance
Environmental-Studies
English
Computer-Science
Data-Science
Communications
Actuarial-Science
Marketing
Management
Arts
Nursing
HYED
