# Teachers
Load and join teacher employment and assignment files for multiple years. Produces separate csvs for employment, assignment data

In [1]:
import pandas as pd
import re
import datetime

#Source folder for the excel files, downloaded from MDE website
folder = 'C:/Users/swastvedt/Documents/MDE Data_Documents/Teachers/Originals/'
currentYear = datetime.date.today().year % 100
#Earliest year we have data for
a = 9

employ = pd.DataFrame()
assign = pd.DataFrame()

employFiles = []
assignFiles = []

###Create two lists of file names, assignment and employment
while a < currentYear:
    if a < 10:
        assignFiles.append(folder + 'Assignment/200' + str(a) + '-20' + str(a+1) + ' Assignment Detail.xlsx')
        employFiles.append(folder + 'Employment/200' + str(a) + '-20' + str(a+1) + ' Employment Detail.xlsx')
        
    else:
        assignFiles.append(folder + 'Assignment/20' + str(a) + '-20' + str(a+1) + ' Assignment Detail.xlsx')
        employFiles.append(folder + 'Employment/20' + str(a) + '-20' + str(a+1) + ' Employment Detail.xlsx')
        
    a = a + 1
    
def upcase_first_letter(s):
    return s[0].upper() + s[1:]

def remove_spaces(s):
    return re.sub(r' ', '', s)

def add_zeros(c, n):
    def f(s):
        while len(str(s)) < n:
            s = '0'+str(s)
        return s
    
    c = [f(s) for s in c]
        
    return c

###Standardize column names
def clean_table(df):
    df.columns = [str(remove_spaces(x)) for x in df.columns]
    df.rename(columns = {
        'dat_yer':'DataYear',
        'fdr_num':'FdrNum',
        'districtnumber': 'DistrictNumber',
        'districttype': 'DistrictType',
        'districtname': 'DistrictName',
        'lst_nam': 'LastName',
        'fst_name': 'FirstName',
        'ini': 'Ini',
        'sex': 'Sex',
        'ethnicity': 'Ethnicity',
        'wks_wrk': 'WksWrk',
        'ft_pt': 'FtPt',
        'schoolnumber': 'SchoolNumber',
        'schoolname': 'SchoolName',
        'asg_cde': 'AsgCde',
        'asg_des': 'AsgDes',
        'asg_cnt': 'AsgCnt',
        'module_length': 'ModuleLength',
        'mod_wks': 'ModWks',
        'crt_cde': 'CrtCde',
        'asg_Fte': 'AsgFte',
        'Mode_Of_Teaching': 'ModeOfTeaching',
        'hqt_sts': 'HqtSts',
        'address': 'Address',
        'city': 'City',
        'state': 'State',
        'zip': 'Zip',
        'zip4': 'Zip4',
        'eco_dev_rgn': 'EconomicDevelopmentRegion',
        
    }, inplace=True)
    
    df['DistrictNumber'] = add_zeros(df['DistrictNumber'], 4)
    df['DistrictType'] = add_zeros(df['DistrictType'], 2)
    if 'SchoolNumber' in df.columns:
        df['SchoolNumber'] = add_zeros(df['SchoolNumber'], 3)
    
    return df

###Load files from source folder. Specify str datatype for text fields
def load_data(f):    
    df = pd.read_excel(f,
                      converters = {
                        'dat_yer':str,
                        'fdr_num':str,
                        'districtnumber':str,
                        'districtNumber': str,
                        'DistrictNumber': str,
                        'District Number': str,
                        'District Type': str,
                        'districttype': str,
                        'districtType': str,
                        'DistrictType': str,
                        'schoolnumber': str,
                        'schoolNumber': str,
                        'School Number': str,
                        'SchoolNumber': str,
                        'asg_cde': str,
                        'crt_cde': str,
                        'zip': str,
                        'zip4': str,
                        'eco_dev_rgn': str 
                      })
    clean_table(df)
    return df

In [92]:
###Load data for each file name in the files list
employFrames = [ load_data(f) for f in employFiles ]
assignFrames = [ load_data(f) for f in assignFiles ]

In [93]:
###Create "DistrictID" column to use as an index
for df in assignFrames:
    df['DistrictID'] = df['DistrictNumber'] + '-' + df['DistrictType'] + '-' + df['SchoolNumber']
    
for df in employFrames:
    df['DistrictID'] = df['DistrictNumber'] + '-' + df['DistrictType'] + '-000'

###Set indices
employFrames = [ df.set_index(['DataYear', 'FdrNum', 'DistrictID']) for df in employFrames ]
assignFrames = [ df.set_index(['DataYear', 'FdrNum', 'DistrictID', 'AsgCde']) for df in assignFrames ]

In [94]:
###Join dataframes for all years into one. Drop blank rows.
employ = employ.append(employFrames)
assign = assign.append(assignFrames)

In [95]:
employ.drop('end of workbook', level='DataYear', inplace=True)
assign.drop('end of workbook', level='DataYear', inplace=True)

In [96]:
###Save tables to local folder
employ.to_csv('tempdata/employ.csv')
assign.to_csv('tempdata/assign.csv')

In [46]:
###Update shared files on OneDrive and return new link to file.
import onedrivesdk
from onedrivesdk.helpers import GetAuthCodeServer

redirect_uri = 'http://localhost:8080/'

#Credentials for "Python data upload" OneDrive app
client_secret = 'YfXJZzwOK8rkym1Qj7TphXO'
client = onedrivesdk.get_default_client(client_id='fd99211b-d8ec-413a-a1d9-ed4ab1ac8a6e',
                                            scopes=['wl.signin',
                                                    'wl.offline_access',
                                                    'onedrive.readwrite'])
auth_url = client.auth_provider.get_auth_url(redirect_uri)

# Block thread until we have the code
code = GetAuthCodeServer.get_auth_code(auth_url, redirect_uri)
# Finally, authenticate
client.auth_provider.authenticate(code, redirect_uri, client_secret)

127.0.0.1 - - [05/Oct/2017 15:04:26] "GET /?code=M7e95e7bd-693f-98cc-ff1e-79f79dae45dc HTTP/1.1" 200 -


In [47]:
returned_item1 = client.item(id='root').children['employ.csv'].upload('tempdata/employ.csv')

ValueError: No JSON object could be decoded

In [None]:
returned_item2 = client.item(id='root').children['assign.csv'].upload('tempdata/assign.csv')

In [11]:
#Input the client information from above and the enroll and assign filenames to generate new sharing links
def get_sharing_link(c, fileName):
    permission = c.item(id='root').children[fileName].create_link("view").post()
    return ("\n{}\n".format(permission.link.web_url))

links = [get_sharing_link(client, 'employ.csv'), get_sharing_link(client, 'assign.csv')]

print links[0]
print links[1]

'\nhttps://1drv.ms/u/s!AvY_lznp4oOFbYO05RJAN2MP-90\n'