# Summit Schools Manager of Data Performance Task
## Prepared by Justin August

In [1]:
import pandas as pd
from datetime import date

In [2]:
#Define today's date for filename usage pater on.

TODAY = date.today().strftime('%m-%d-%Y')

### A dictionary to hold site metadata such as shortname and site name each site.

_This would need to be expanded and customized for each data source as schools were added_

In [3]:
# AL: maybe small note here that this is dict of site id to short name?
# LW: IIUC I think you shouldn't need the site ids here explicitly, you should be able to extract them later. You do need the map of long site name to short site name. 
CA_SITE_META = {2:'Tahoma',
             3:'Prep',
             4:'Everest',
             5:'Denali',
             6:'Shasta',
             7:'K2',
             8:'Tamalpais'
            }

# AL: added this to replace the defaults you pass in earlier
DEFAULT_SITE_IDS = list(CA_SITE_META.keys())

## Part 1: School Rosters

In [4]:
#Fetch CSV and put it into a DataFrame
ELA_STATUSES_URL = 'https://drive.google.com/uc?export=download&id=1dK-050RcSingosBwLcuXBmrZavcfL8-j'

# AL: so i know i said one thing about constants, but looking back, maybe the term i was looking for is "configuration variable", which this isn't. i'd keep `ELA_STATUSES_DATA` lowercase like a regular variable even though it's not supposed to be modified later
ELA_STATUSES_DATA = pd.read_csv(ELA_STATUSES_URL)

#Add site short names using the dictionary created above
#LW: This could be omitted, since you cane use the map later to look these up.
ELA_STATUSES_DATA['SITE_SHORT_NAME'] = ELA_STATUSES_DATA['SITE_ID'].map(CA_SITE_META)


### These are the data specified to be contained within the enrollment files

In [5]:
ROSTER_DATA_COLUMNS = ['LOCAL_STUDENT_ID',
                'STATE_STUDENT_ID',
                'SITE_ID',
                'SITE_NAME',
                'FIRST_NAME',
                'LAST_NAME',
                'GRADE_LEVEL',
                'CURRENT_SCHOOL_ENROLLMENT_START_DATE',
                'CURRENT_SCHOOL_ENROLLMENT_END_DATE'
               ]

### This function will be default run a roster report on all sites in CA using the given DataFrame.
Individual or subsets of sites can be run by placing the site IDs in a list. Additionally a separate DataFrame can be called.

In [30]:
# AL: i like this function. i can see a person reusing this in the future to maybe only create a roster on a subset of sites. replaced the list with `DEFAULT_SITE_IDS` here
def create_rosters(site_ids = DEFAULT_SITE_IDS,
                   ela_statuses_data = ELA_STATUSES_DATA
                  ):
    

    # LW: Instead of iterating over site_ids, you could just iterate over (key,value) from a map of long name to short name.
    for site_id in site_ids:
        
        # AL: it feels more concise to just pull this from `CA_SITE_META`, but maybe there's a reason for not doing that?

        # LW: Iterating over the map of long name to short name inverts this to you looking up the site_id here.
        #Get site's shortname to use in the filename
        site_shortname = ela_statuses_data.loc[ela_statuses_data['SITE_ID'] == site_id,'SITE_SHORT_NAME'].values[0]


        #filename as specified
        filename = f'{site_shortname}_{site_id}_Roster_{TODAY}.csv'

        # LW: +1 for disc sounding antiquated. 'Write files' is sufficient.
        #Write files to disc and print
        ela_statuses_data.loc[ela_statuses_data['SITE_ID'] == site_id,ROSTER_DATA_COLUMNS].to_csv(filename)
            
        print(f'Roster for {site_shortname} written to {filename}')
    
    print('All sites data written to disc.')

In [31]:
create_rosters()

Roster for Tahoma written to Tahoma_2_Roster_10-17-2020.csv
Roster for Prep written to Prep_3_Roster_10-17-2020.csv
Roster for Everest written to Everest_4_Roster_10-17-2020.csv
Roster for Denali written to Denali_5_Roster_10-17-2020.csv
Roster for Shasta written to Shasta_6_Roster_10-17-2020.csv
Roster for K2 written to K2_7_Roster_10-17-2020.csv
Roster for Tamalpais written to Tamalpais_8_Roster_10-17-2020.csv
All sites data written to disc.


## Part 2: English Proficiency Testing Lists

In [17]:
#Data URL
MENTOR_DATA_URL = 'https://drive.google.com/uc?export=download&id=1wpKxw2rWB1a7jQBDunay0JfSO6lUiJ67'

# Pull in and merge both sheets from the remote Excel document
# This assumes the workbooks are consistent format with two sheets of data

# AL: same thing mentioned above with lowercase naming. sorry for steering ya in the wrong direction
MENTOR_DATA = pd.merge(pd.read_excel(MENTOR_DATA_URL, sheet_name = 0),
                       pd.read_excel(MENTOR_DATA_URL, sheet_name = 1))

### Define Default ELA Statuses of Interest

In [9]:
ELA_STATUSES = ['EL','TBD']

### Define columns for final output
_Case and Spacing will be corrected before output_

In [10]:
ELA_PROF_DATA_COLUMNS = ['LOCAL_STUDENT_ID',
                'STATE_STUDENT_ID',
                'SITE_NAME',
                'FIRST_NAME',
                'LAST_NAME',
                'GRADE_LEVEL',
                'MENTOR_FIRST_NAME',
                'MENTOR_LAST_NAME',
                'CURRENT_ELA_STATUS',
                'ELA_PRIMARY_LANGUAGE'
               ]

ELA_PROF_BLANK_COLUMNS = ['Notification Letter Sent Home',
                 'Date Notification Letter Sent Home',
                 'Date Listening Completed',
                 'Date Reading Completed',
                 'Date Writing Completed',
                 'Date Speaking Completed',
                 'Date Assessment Completed',
                 'Assessment Deadline',
                 'Notes'
                ]

### This function will be default create a testing list in XLSX format with sheets for all sites in CA using the mentor DataFrame and ELA Status DF, filtering by ELA Statuses of "ELA" and "TBD".
- Individual or subsets of sites can be run by placing the site IDs in a list.
- Different ELA statuses could be defined as well via a list.

In [28]:
def create_test_lists(mentor_df = MENTOR_DATA,
                      ela_statuses_df = ELA_STATUSES_DATA,
                      # LW: Nit, no trailing underscore here probably?
                      ela_statuses_ = ELA_STATUSES,
                      site_ids = DEFAULT_SITE_IDS
                     ):
    
   # Correct column names to save time later on a subsequent merge
    mentor_df.columns = ['LOCAL_STUDENT_ID', 'MENTOR_GROUP_ID', 'MENTOR_ID', 'MENTOR_FIRST_NAME',
           'MENTOR_LAST_NAME', 'MENTOR_FULL_NAME']
    
    # Merge ELA Status data with mentor data using 'LOCAL_STUDENT_ID'
    test_list_data = pd.merge(ela_statuses_df,
                           mentor_df,
                           on = 'LOCAL_STUDENT_ID'
                          )
    
    # Filter the data down to relevant columns needed
    test_list_data = test_list_data.loc[test_list_data['CURRENT_ELA_STATUS'].isin(ela_statuses_),
                                  ELA_PROF_DATA_COLUMNS]
    
    #Append empty columns
    test_list_data[ELA_PROF_BLANK_COLUMNS] = ''

    #Fix columns case from import to match requirements
    fixed_columns = []
    for column in test_list_data.columns:
        fixed_columns.append(column.title().replace("_"," ").replace(" Id"," ID").replace('Ela ','ELA '))
    test_list_data.columns = fixed_columns

    
    
    test_list_filename = f'SPS_English_Proficiency_Testing_Lists_All_Schools_{TODAY}.xlsx'
    
    # AL: nit - i'd just say "files written" w/o the word "disc" because everyone's on the cloud these days, or sharepoint. feels a bit clunky

    # Write file to disc
    
    with pd.ExcelWriter(path = test_list_filename, mode='w', engine = 'openpyxl') as writer:

        print(f'Writing data to {test_list_filename}')

        for site_id in site_ids:
            site_shortname = ela_statuses_df.loc[ela_statuses_df['SITE_ID'] == site_id,'SITE_SHORT_NAME'].values[0]
            site_name = ela_statuses_df.loc[ela_statuses_df['SITE_ID'] == site_id,'SITE_NAME'].values[0]
            
            
            # Write data to disc
            test_list_data.loc[test_list_data['Site Name'] == site_name].to_excel(writer,
                                                                        sheet_name = site_shortname)
            print(f'Data for {site_name} written.')
            
    print('All sites data written to disc.')

In [29]:
create_test_lists()

Writing data to SPS_English_Proficiency_Testing_Lists_All_Schools_10-17-2020.xlsx
Data for Summit Public School: Tahoma written.
Data for Summit Preparatory Charter High School written.
Data for Everest Public High School written.
Data for Summit Public School: Denali written.
Data for Summit Public School: Shasta written.
Data for Summit Public School: K2 written.
Data for Summit Public School: Tamalpais written.
All sites data written to disc.
