**This notebook contains a script that sends learnerships contracts to students**

**Add Local Library to Path**

In [1]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    sys.path.append(module_path + '/local_library')

**Import Libraries**

In [2]:
import pandas as pd
import requests
import json
from datetime import date
from local_library import import_worksheet
from local_library import export_worksheet
from local_library import send_bulk_signrequest

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=1017190226189-f1d5s7cpjrj54u2rqk1ufh9pevguqoap.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.


**Create Library**

In [3]:
#split a column into multiple columns
def split_column_num(target_df, column, column_len):
    
    #create list from 0 to column_len
    n_list = list(range(0, column_len))
    
    #create and populate columns
    for i in n_list:
        target_df[f'{column}_{i+1}'] = [elem[i] if i < len(elem) else '' for elem in target_df[column]]
        
    return


#split column into two binary columns
def column_to_binary(target_df, column, specify='no'):
    
    #convert column to string
    target_df[column] = target_df[column].astype(str)
    
    #strip whitespace
    target_df[column] = target_df[column].str.strip()
    
    #create and populate binary columns
    target_df[f'{column}_yes'] = ['X' if elem.lower() == 'yes' else '' for elem in target_df[column]]
    target_df[f'{column}_no'] = ['X' if elem.lower() == 'no' else '' for elem in target_df[column]]
    
    return

#convert phone numbers
def convert_phone(target_df, column):
    target_df[column] = target_df[column].astype(str)
    target_df[column] = target_df[column].astype(float)
    target_df[column] = target_df[column].astype(int)
    target_df[column] = target_df[column].astype(str)
    
    return


#convert column type to string
def convert_columns_type_string(target_df, columns):
    for column in columns:
        target_df[column] = target_df[column].astype(str)

    return


#create age columns
def create_age_column(target_df, column):
    
    #convert dob to datetime formatt
    target_df[column] = pd.to_datetime(target_df[column])

    #set the date
    today = date.today()

    #calculate age
    target_df['Age'] = today.year - target_df[column].dt.year

    #convert age to int
    target_df['Age'] = target_df['Age'].astype(int)

    #mark if above 35 years
    target_df['above_35'] = ['X' if age > 35 else '' for age in target_df['Age']]

    #mark if below 36 years
    target_df['below_35'] = ['X' if age < 36 else '' for age in target_df['Age']]
    
    return


#split columns based on column values
def split_column(target_df, column):
    new_columns = target_df[column].unique()
    for new_column in new_columns:
        target_df[new_column] = ['X' if elem == new_column else '' for elem in target_df[column]]
        
    return


#split column based on status
def split_column_status(target_df, column, value):
    #mark if citizen yes
    target_df[f"{column}_yes"] = ['X' if elem == value else '' for elem in target_df[column]]

    #mark if citizen no
    target_df[f"{column}_no"] = ['X' if elem != value else '' for elem in target_df[column]]

    #specify citizenshp status
    target_df[f"{column}_specify"] = [elem if elem != value else '' for elem in target_df[column]]
    
    return


def split_disability(target_df, column, value):
    target_df[f"{column}_yes"] = ['X' if elem != value else '' for elem in target_df[column]]

    target_df[f"{column}_no"] = ['X' if elem == value else '' for elem in target_df[column]]

    target_df[f"{column}_specify"] = [elem if elem != value else '' for elem in target_df[column]]
    
    return


#add static column
def add_static_columns(target_df, column_dict):
    for key in column_dict.keys():
        target_df[key] = column_dict[key]
        
    return


#subset a dataframe
def subset_dataframe(target_df, column, subset_array):
    return target_df[target_df[column].isin(subset_array)]

**Import WorkSheets**

In [4]:
#import sponsorship data
cohort_df = import_worksheet("Cohort 2021 MICT System Onloading", "Sheet1")

#import tag ids
tag_ids_df = import_worksheet("Tag IDs","Student")

**Update Cohort Column Names**

In [5]:

column_names = {"Home Language":"home_language","Home Street":"home_address_1", "Home Suburb":"home_address_2", "Home City":"home_address_3",
                "Home Postal Code":"home_address_post_code", "Disability Type":"disability",
           "Date Of Birth":"birth_date", "First Name":"learner_name", "Last Name": "learner_surname",
                "Mobile Phone":"learner_phone", "Email Address":"learner_email",
               "Name of High School":"high_school", "Completion Year":"high_school_last_year",
               "Residential Status":"citizen", "ID / Passport Number":"id", "Gender":"gender", "Ethnicity":'ethnicity',
               }

cohort_df = cohort_df.rename(columns=column_names)

**Convert Columns to Type String**

In [6]:
column_names = ['birth_date']

convert_columns_type_string(cohort_df, column_names)

**Convert Phone Number and High School Year**

In [7]:
#cnvert phone to type string
#convert_phone(cohort_df, 'learner_phone')
cohort_df['learner_phone'] = cohort_df['learner_phone'].astype(str)
if "." in cohort_df['learner_phone'].values[0]:
    cohort_df['learner_phone'] = [phone.split(".")[0] for phone in cohort_df['learner_phone']]


cohort_df['high_school_last_year'] = cohort_df['high_school_last_year'].astype(str)
if "." in cohort_df['high_school_last_year'].values[0]:
    cohort_df['high_school_last_year'] = [year.split(".")[0] for year in cohort_df['high_school_last_year']]

**Create ID column**

In [8]:
#use split_column
split_column_num(cohort_df, "id", 13)

**Filter empty columns**

In [11]:
#cohort_df = cohort_df[cohort_df['learner_phone'].notna()]
cohort_df = cohort_df[cohort_df['birth_date'].notna()]
#cohort_df = cohort_df[cohort_df['high_school_last_year'].notna()]


**Create Age Column**

In [12]:
#create age column
create_age_column(cohort_df, "birth_date")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


**Create Gender Columns**

In [13]:
#strip gender column whitespaces
cohort_df['gender'] = cohort_df['gender'].str.strip()

#convert equity column to lowercase
cohort_df['gender'] = cohort_df['gender'].str.lower()

split_column(cohort_df, 'gender')

**Create Ethnicity Columns**

In [14]:
#strip equity column whitespaces
cohort_df['Equity'] = cohort_df['ethnicity'].str.strip()

#convert equity column to lowercase
cohort_df['Equity'] = cohort_df['Equity'].str.lower()

#reformat equity column
cohort_df['Equity'] = cohort_df['Equity'].replace({'black': 'african', 'coloured': 'coloured',
                             'indian': 'indian', 'asian': 'african',
                             'chinese': 'african', 'white': 'white'})

split_column(cohort_df, 'Equity')

**Create Disability Column**

In [15]:
#strip disability column whitespaces
cohort_df['disability'] = cohort_df['disability'].str.strip()

#convert disability column to lowercase
cohort_df['disability'] = cohort_df['disability'].str.lower()

#reformat disability column
#cohort_df['disability'] = cohort_df['disability'].replace({'no': 'None', 'nan': 'None',
#                             'yes': 'Disabled but unspecified', 'yes - specwearer': 'None',
#                             'n': 'None', '-': 'None', 'visual impairment - spec wearer': 'None',
#                             'aspergers / autistic': 'Emotional (behav/psych)', 'patella alta':'Physical (move/stand etc)',
#                             'y- specwearer': 'None', 'add': 'None', 'visual impairment - spec wearer -': 'None',
#                             '\\n': 'None', 'y - specwearer':'None'})

#create disability yes, no, status columns
split_disability(cohort_df, 'disability', 'None')

**Create Citizenship Column**

In [16]:
#strip citizenship column
cohort_df['citizen'] = cohort_df['citizen'].str.strip()

#convert citizenship column to lowercase
cohort_df['citizen'] = cohort_df['citizen'].str.lower()

#create citizeenship yes, no, status columns
split_column_status(cohort_df, 'citizen', 'south african')

**Create Fullname Columns**

In [17]:
#create full name and initials
def create_full_name(target_df):
    target_df['learner_fullname'] = target_df['learner_name'].str.strip().str.lower() + " " + target_df['learner_surname'].str.strip().str.lower()
    target_df['learner_fullname'] = target_df['learner_fullname'].str.title()
    target_df['learner_initials'] = [elem[0][0].upper() + elem[1][0].upper() for elem in target_df['learner_fullname'].str.split(" ")]
    
    return

create_full_name(cohort_df)

**Get Specific Students**

In [18]:
cohort_df = subset_dataframe(cohort_df,'Username', ['sifndlov021', 'nonmthim021'])


**Verify Columns**

In [19]:
for column in list(filter(lambda x: type(x) == str,tag_ids_df['Contract'])):
    if column not in cohort_df.columns:
        print(column)

**Set Learner Email and Template ID**

In [20]:
cohort_df['learner_email'] = 'mufaro@thoughtquest.co.za'

template_ids = {'MICT':'ab47c0e5-5be4-48c8-bcf1-b4eae380f296', 'Contract':'e94f4098-fe6c-476e-b9d9-12b51aea01bf', 
               'MICT_OLD':'9f5bf080-569e-4f14-8de1-c17cdb51e7c4'}
cohort_df['template_id'] = template_ids['Contract']

target_columns = list(filter(lambda x: type(x) == str,tag_ids_df['Contract']))

**Replace All Null Values**

In [21]:
cohort_df = cohort_df.fillna(" ")

**Send Contracts**

In [22]:
send_bulk_signrequest(cohort_df, target_columns, 'template_id')

Signer: mufaro@thoughtquest.co.za , Status: 201


In [16]:
#import sponsorship data
cohort_df = import_worksheet("Tag IDs", "Sheet4")

#import tag ids
tag_ids_df = import_worksheet("Tag IDs","Sheet3")

In [17]:
number_columns = ['learner_id','id_1','id_2','id_3','id_4','id_5','id_6','id_7','id_8','id_9','id_10','id_11','id_12','id_13','post_code','learner_phone','high_school_last_year']

In [18]:
for column in number_columns:
    cohort_df[column] = cohort_df[column].astype(str)
    if "." in cohort_df[column].values[0]:
        cohort_df[column] = [phone.split(".")[0] for phone in cohort_df[column]]