# Summit Schools Manager of Data Performance Task
## Prepared by Justin August

In [1]:
import pandas as pd
from datetime import date

In [2]:
# AL: nit - i prefer YYYY-MM-DD since it's alphabetically self-sorting. and oddly enough they reference the date using YYYY-MM-DD, but the actual filename is in MM-DD-YYYY format.

today = date.today().strftime('%m-%d-%Y')

## Part 1: School Rosters

In [3]:
#import data

# AL: nit - reading the grading rubric, seems like a URL should be a constant? i.e. all uppercase
ca_students_ela_statuses_data_url = 'https://drive.google.com/uc?export=download&id=1dK-050RcSingosBwLcuXBmrZavcfL8-j'
ela_statuses_data = pd.read_csv(ca_students_ela_statuses_data_url)

### I chose to create a DataFrame to easily look up metadata (site ID, name, shortname) for each site.

_This would need to be expanded and customized for each data source as schools were added_

In [4]:

# AL: is the data in the CSV guaranteeded to be in the same order of the short name array you made here? The instructions say "SiteShortName is matched based on" -- are they hinting at simple search here?

site_metadata = pd.DataFrame(list(zip(ela_statuses_data.SITE_ID.unique(),
                      ela_statuses_data.SITE_NAME.unique(),
                      ['Tahoma','Prep','Everest','Denali','Shasta','K2','Tamalpais']
                     )),
            columns = ['site_id','site_name', 'site_shortname']
            )

### These are the data specified to be contained within the enrollment files

In [5]:
# AL: i'd make this a constant
roster_data_columns = ['LOCAL_STUDENT_ID',
                'STATE_STUDENT_ID',
                'SITE_ID',
                'SITE_NAME',
                'FIRST_NAME',
                'LAST_NAME',
                'GRADE_LEVEL',
                'CURRENT_SCHOOL_ENROLLMENT_START_DATE',
                'CURRENT_SCHOOL_ENROLLMENT_END_DATE'
               ]

In [6]:
for site_id in site_metadata['site_id']:
    #lookup site_shortname from the metadata table
    site_shortname = site_metadata.loc[site_metadata['site_id'] == site_id,'site_shortname'].values[0]

    
    #filename as specified
    filename = f'{site_shortname}_{site_id}_Roster_{today}.csv'
    
    #output files
    ela_statuses_data.loc[ela_statuses_data['SITE_ID'] == site_id,roster_data_columns].to_csv(filename)
    print(f'Roster for Site {site_id} ({site_shortname}) Output to {filename}')

Roster for Site 2 (Tahoma) Output to Tahoma_2_Roster_10-16-2020.csv
Roster for Site 3 (Prep) Output to Prep_3_Roster_10-16-2020.csv
Roster for Site 4 (Everest) Output to Everest_4_Roster_10-16-2020.csv
Roster for Site 5 (Denali) Output to Denali_5_Roster_10-16-2020.csv
Roster for Site 6 (Shasta) Output to Shasta_6_Roster_10-16-2020.csv
Roster for Site 7 (K2) Output to K2_7_Roster_10-16-2020.csv
Roster for Site 8 (Tamalpais) Output to Tamalpais_8_Roster_10-16-2020.csv


## Part 2: English Proficiency Testing Lists

In [7]:
# AL: same comment as above wrt constants

mentor_data_url = 'https://drive.google.com/uc?export=download&id=1wpKxw2rWB1a7jQBDunay0JfSO6lUiJ67'

#Pull in and merge both sheets from the remote Excel document
mentor_data = pd.merge(pd.read_excel(mentor_data_url, sheet_name = 0),pd.read_excel(mentor_data_url,
                           sheet_name = 1))

##Correct column names to save time later on a subsequent merge
mentor_data.columns = ['LOCAL_STUDENT_ID', 'MENTOR_GROUP_ID', 'MENTOR_ID', 'MENTOR_FIRST_NAME',
       'MENTOR_LAST_NAME', 'MENTOR_FULL_NAME']

### Define ELA Statuses of Interest

In [8]:
# AL: yeah, i'm _constant_ly nagging ;)
ela_statuses = ['EL','TBD']

### Define columns for final output
_Case and Spacing will be corrected before output_

In [9]:
ela_prof_data_columns = ['LOCAL_STUDENT_ID',
                'STATE_STUDENT_ID',
                'SITE_NAME',
                'FIRST_NAME',
                'LAST_NAME',
                'GRADE_LEVEL',
                'MENTOR_FIRST_NAME',
                'MENTOR_LAST_NAME',
                'CURRENT_ELA_STATUS',
                'ELA_PRIMARY_LANGUAGE'
               ]

roster_blank_columns = ['Notification Letter Sent Home',
                 'Date Notification Letter Sent Home',
                 'Date Listening Completed',
                 'Date Reading Completed',
                 'Date Writing Completed',
                 'Date Speaking Completed',
                 'Date Assessment Completed',
                 'Assessment Deadline',
                 'Notes'
                ]

### Merge and Filter data

In [10]:
roster_data = pd.merge(ela_statuses_data,
                       mentor_data,
                       on = 'LOCAL_STUDENT_ID'
                      )
roster_data = roster_data.loc[roster_data['CURRENT_ELA_STATUS'].isin(ela_statuses),
                              ela_prof_data_columns]

In [11]:
# AL: you know more about Pandas than me, but can you use a map method instead of an intermediary array here?

#Fix columns case from import to match requirements
columns_case_fixed = []
for column in roster_data.columns:
    columns_case_fixed.append(column.title().replace("_"," ").replace(" Id"," ID").replace('Ela ','ELA '))
roster_data.columns = columns_case_fixed

In [12]:
#Append empty columns
roster_data[roster_blank_columns] = ''

### Output Data

In [13]:
roster_filename = f'SPS_English_Proficiency_Testing_Lists_All_Schools_{today}.xlsx'

In [14]:
with pd.ExcelWriter(path = roster_filename, mode='w', engine = 'openpyxl') as writer:
    
    print(f'Outputing to {roster_filename}')
    
    for site_name in site_metadata['site_name']:
        #lookup site_shortname from the metadata table
        site_shortname = site_metadata.loc[site_metadata['site_name'] == site_name,'site_shortname'].values[0]

        # AL: nit - "write data to file"?
        #output files
        roster_data.loc[roster_data['Site Name'] == site_name].to_excel(writer,
                                                                        sheet_name = site_shortname)
        print(f'Output data for {site_name} to file.')

Outputing to SPS_English_Proficiency_Testing_Lists_All_Schools_10-16-2020.xlsx
Output data for Summit Public School: Tahoma to file.
Output data for Summit Preparatory Charter High School to file.
Output data for Everest Public High School to file.
Output data for Summit Public School: Denali to file.
Output data for Summit Public School: Shasta to file.
Output data for Summit Public School: K2 to file.
Output data for Summit Public School: Tamalpais to file.
