**This notebook contains a scripts that creates the database used to prepopulate data to learnership documents**

**Import Libraries**

In [118]:
import pandas as pd
import requests
import json
from fuzzywuzzy import process
from datetime import date
import numpy as np
import gspread
import gspread_dataframe as gd
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

**Authenticate Gspread**

In [119]:
gc = gspread.oauth()

**Local Functions Library**

In [120]:
#Import Google Worksheet
def import_worksheet(google_spreadsheet, google_worksheet):
    
    #import worksheet
    data_worksheet = gc.open(google_spreadsheet).worksheet(google_worksheet)
    
    #import data to dataframe
    target_df = gd.get_as_dataframe(data_worksheet)
    
    #remove null rows and columns
    target_df = target_df.dropna(how='all')
    new_columns = []
    for column in target_df.columns:
        if 'Unnamed' not in column:
            new_columns.append(column)
    target_df = target_df[new_columns]
    
    return target_df


#split a column into multiple columns
def split_column_num(target_df, column, column_len):
    
    #create list from 0 to column_len
    n_list = list(range(0, column_len))
    
    #create and populate columns
    for i in n_list:
        target_df[f'{column}_{i+1}'] = [elem[i] if i < len(elem) else '' for elem in target_df[column]]
        
    return


#split column into two binary columns
def column_to_binary(target_df, column, specify='no'):
    
    #convert column to string
    target_df[column] = target_df[column].astype(str)
    
    #strip whitespace
    target_df[column] = target_df[column].str.strip()
    
    #create and populate binary columns
    target_df[f'{column}_yes'] = ['X' if elem.lower() == 'yes' else '' for elem in target_df[column]]
    target_df[f'{column}_no'] = ['X' if elem.lower() == 'no' else '' for elem in target_df[column]]
    
    return


#split address into component parts
def split_address(target_df, column):
    
    #set column to type string
    target_df[column] = target_df[column].astype(str)
    
    #split address into street, area, city and strip
    for i in range(1,4):
        if len(list(target_df[column].str.split(","))[0]) > i - 1:
            target_df[f"{column}_{i}"] = target_df[column].str.split(",").str[i-1] + ","
            target_df[f"{column}_{i}"] = target_df[f"{column}_{i}"].str.strip()
    
    #split addrss into area code and strip
    if len(list(target_df[column].str.split(","))[0]) > 3:
        target_df[f"{column}_post_code"] = target_df[column].str.split(",").str[3:].str.join("")
        target_df[f"{column}_post_code"] = target_df[f"{column}_post_code"].str.strip()
    
    return

#convert phone numbers
def convert_phone(target_df, column):
    target_df[column] = target_df[column].astype(str)
    target_df[column] = target_df[column].astype(float)
    target_df[column] = target_df[column].astype(int)
    target_df[column] = target_df[column].astype(str)
    
    return


#convert column type to string
def convert_columns_type_string(target_df, columns):
    for column in columns:
        target_df[column] = target_df[column].astype(str)

    return


#create age columns
def create_age_column(target_df, column):
    
    #convert dob to datetime formatt
    target_df[column] = pd.to_datetime(target_df[column])

    #set the date
    today = date.today()

    #calculate age
    target_df['Age'] = today.year - target_df[column].dt.year

    #convert age to int
    target_df['Age'] = target_df['Age'].astype(int)

    #mark if above 35 years
    target_df['above_35'] = ['X' if age > 35 else '' for age in target_df['Age']]

    #mark if below 36 years
    target_df['below_35'] = ['X' if age < 36 else '' for age in target_df['Age']]
    
    return


#split columns based on column values
def split_column(target_df, column):
    new_columns = target_df[column].unique()
    for new_column in new_columns:
        target_df[new_column] = ['X' if elem == new_column else '' for elem in target_df[column]]
        
    return


#split column based on status
def split_column_status(target_df, column, value):
    #mark if citizen yes
    target_df[f"{column}_yes"] = ['X' if elem == value else '' for elem in target_df[column]]

    #mark if citizen no
    target_df[f"{column}_no"] = ['X' if elem != value else '' for elem in target_df[column]]

    #specify citizenshp status
    target_df[f"{column}_specify"] = [elem if elem != value else '' for elem in target_df[column]]
    
    return


#createe prefill tags
def create_tags(row, columns):
    prefill_tags = []
    for column in columns:
        tags = {}
        tags["external_id"] = column
        tags["text"] = row[column]
        prefill_tags.append(tags)
        
    return prefill_tags


#add static column
def add_static_columns(target_df, column_dict):
    for key in column_dict.keys():
        target_df[key] = column_dict[key]
        
    return


#subset a dataframe
def subset_dataframe(target_df, column, subset_array):
    return target_df[target_df[column].isin(subset_array)]


#combine sponsor and cohort dataframes
def combine_dataframes(target_df, sponsor_df):

    for column in sponsor_df.columns:
        target_df.loc[:,column] = sponsor_df[column].values[0]
    
    return

#replace nan values
def replace_nan(target_df, columns):
    target_df.loc[:,columns] = ["" for elem in columns]
    return

**Import Worksheets**

In [121]:
#import cohort data
cohort_df = import_worksheet("Cohort 2020 Data", "Cohort 2020 Data")

#import sponsorship data
sponsor_df = import_worksheet("Learnership Agreement Form (Responses)", "Form Responses 1")


**Update Sponsor Column Names**

In [122]:
column_names = {'Legal Name of Company:': 'employer_legal_name',
                           'Trading Name of Company (if different from Legal Name):':'employer_trading_name',
                           'Business Address:':'business_address',
                           'Postal Address (if different from Business Address):':'business_postal_address',
                           'Are you liable for skills development levy?':'skills_development',
                           'If yes, what is your SDL number?':'sdl',
                           'Name of SETA with which you are registered:':'seta_name',
                           'Are you acting as the lead Employer? (the answer to this should be yes if you are the sponsor)': 'lead_employer',
                           'Contact Person responsible for Learnership:': 'learnership_contact',
                           'Work Phone:': 'employer_phone',
                           'Work Fax:':'employer_fax',
                           'E-mail Address:': 'employer_email',
                           'Full Name of representative responsible for signing the Learnership Agreement:':'employer_fullname',
                           'Initials of representative responsible for signing the Learnership Agreement:':'employer_initials'}

sponsor_df = sponsor_df.rename(columns=column_names)


**Update Cohort Column names**

In [123]:

column_names = {'Home Address - Urban or Rural':'rural_urban', 'Home Language':'home_language', 'Municipality':'municipal',
          'Home Address':'home_address', 'Physical Address while studying':'residential_address', 'Disability':'disability',
           'Birth Date':'birth_date', "Firstname":"learner_name", "Surname": "learner_surname",
               "Postal Address":"postal_address", "Mobile Number":"learner_phone", "Student Email":"learner_email",
               "Name of High School":"high_school", "Year Completed Grade 12":"high_school_last_year",
               "Residential Status (Citizen / Permanent Resident/Asylum Seeker/Work Permit/Study Permit)":"citizen"}

cohort_df = cohort_df.rename(columns=column_names)

**Convert Columns to Type String**

In [124]:
column_names = ['rural_urban', 'home_language', 'municipal', 'home_address',
                'residential_address', 'home_address', 'birth_date']

convert_columns_type_string(cohort_df, column_names)

**Get Specific Sponsor**

In [125]:
sponsor_df = subset_dataframe(sponsor_df,'employer_legal_name', ['Q LINK Holdings (Pty) Ltd'])

**Get Specific Students**

In [126]:
cohort_df = subset_dataframe(cohort_df,'Active / Not active/Early absorption', ['Active'])

cohort_df = subset_dataframe(cohort_df,'Username', ['damangue','lmatsabu','nknkosi','smciwa','tkwayiba','lmoykwen'])
len(cohort_df)

6

**Convert Phone Numbers**

In [127]:
#cnvert phone to type string
convert_phone(cohort_df, 'learner_phone')

**Split Home Address**

In [128]:
#create business postal address
split_address(cohort_df, "home_address")

**Split Birthdate**

In [129]:
#create new birthdate column
cohort_df["birth_date"] = cohort_df["birth_date"].astype(str).str.split("-").str.join("")
cohort_df["birth_date"] = [item[-2:] + item[-4:-2] + item[:4] for item in cohort_df["birth_date"]]

#use split_column
split_column_num(cohort_df, "birth_date", 8)

**Create ID Columns**

In [130]:
#create new id number column
cohort_df["id"] = [f"{id_num}".split("\\")[0].strip() for id_num in cohort_df["ID Number/ Passport Number"]]

#use split_column
split_column_num(cohort_df, "id", 13)

**Create Age Column**

In [131]:
#create age column
#create_age_column(cohort_df, "birth_date")

**Create Gender Columns**

In [132]:
#strip gender column whitespaces
cohort_df['Gender'] = cohort_df['Gender'].str.strip()

#convert equity column to lowercase
cohort_df['Gender'] = cohort_df['Gender'].str.lower()

split_column(cohort_df, 'Gender')

**Create Equity Columns**

In [133]:
#strip equity column whitespaces
cohort_df['Equity'] = cohort_df['Ethnicity'].str.strip()

#convert equity column to lowercase
cohort_df['Equity'] = cohort_df['Equity'].str.lower()

#reformat equity column
cohort_df['Equity'] = cohort_df['Equity'].replace({'black': 'african', 'coloured': 'coloured',
                             'indian': 'indian', 'asian': 'african',
                             'chinese': 'african', 'white': 'white'})

split_column(cohort_df, 'Equity')


**Create Disability Columns**

In [134]:
#strip disability column whitespaces
cohort_df['Disability'] = cohort_df['disability'].str.strip()

#convert disability column to lowercase
cohort_df['Disability'] = cohort_df['disability'].str.lower()

#reformat disability column
cohort_df['Disability'] = cohort_df['disability'].replace({'no': 'None', 'nan': 'None',
                             'yes': 'Disable but unspecified', 'yes - specwearer': 'None',
                             'n': 'None', '-': 'None', 'visual impairment - spec wearer': 'None',
                             'aspergers / autistic': 'Emotional (behav/psych)', 'patella alta':'Physical (move/stand etc)',
                             'y- specwearer': 'None', 'add': 'None', 'visual impairment - spec wearer -': 'None',
                             '\\n': 'None', 'y - specwearer':'None'})

#create disability yes, no, status columns
split_column_status(cohort_df, 'disability', 'None')


**Create Citizenship Columns**

In [135]:
#strip citizenship column
cohort_df['citizen'] = cohort_df['citizen'].str.strip()

#convert citizenship column to lowercase
cohort_df['citizen'] = cohort_df['citizen'].str.lower()

#create citizeenship yes, no, status columns
split_column_status(cohort_df, 'citizen', 'citizen')

**Create Full Name and Initials**

In [136]:
#create full name and initials
def create_full_name(target_df):
    target_df['learner_fullname'] = target_df['learner_name'].str.strip().str.lower() + " " + target_df['learner_surname'].str.strip().str.lower()
    target_df['learner_fullname'] = target_df['learner_fullname'].str.title()
    target_df['learner_initials'] = [elem[0][0].upper() + elem[1][0].upper() for elem in target_df['learner_fullname'].str.split(" ")]
    
    return

create_full_name(cohort_df)

**Create Additional Columns**

In [137]:
#column_dict = {}

#add_static_columns(cohort_df, column_dict)

**Process SDL**

In [138]:
sponsor_df['sdl'] = sponsor_df['sdl'].astype(str)
sponsor_df['sdl'] = sponsor_df['sdl'].str.strip()
sponsor_df['sdl'] = [elem[-9:] if len(elem) > 9 else elem for elem in sponsor_df['sdl']]

#use split_column
split_column_num(sponsor_df, "sdl", 9)

**Process Skills Development**

In [139]:
#create skills development binary option
column_to_binary(sponsor_df, 'skills_development')

**Process Lead Employer**

In [140]:
#create skills development binary option
column_to_binary(sponsor_df, 'lead_employer')

**Process Business Address**

In [141]:
#create business address
split_address(sponsor_df, "business_address")

**Process Business Postal Address**

In [142]:
#create business postal address
split_address(sponsor_df, "business_postal_address")

**Create additional Sponsor columns**

In [143]:
#combine_dataframes(cohort_df, sponsor_df)


**Setting columns**

In [144]:
cohort_df["grade_12"] = "Grade 12"
template_df = pd.read_excel('../qlink_template.xlsx')
#template_df.drop(columns=['prefill__white', 'prefill__coloured', 'prefill__business_postal_address_post_code', 'prefill__indian', 'prefill__male'], axis=1)
#replace_nan(sponsor_df, ['employer_fax'])
template_arr = []

for column in template_df.columns:
    if column != 'contact_1_email':
        if column.replace('prefill__','') not in ['highschool_country', 'highschool_province', 'highschool_town', 'highschool_post_code', 'highschool_suburb', 'highschool_street_1', 'home_post_code', 'white', 'coloured', 'indian']:
            template_arr.append(column.replace('prefill__',''))
        
cohort_df = cohort_df[template_arr]

**Set Constants**

In [145]:
#set template id
template_id = 'a70a72ea-fb71-46fd-9bf7-5f4dee7f8a87'

In [146]:
#send signrequest
def send_signrequest(prefill_tags, signers, template_id):
    data = {
        "template": f'https://wethinkcode.signrequest.com/api/v1/templates/{template_id}/',
        "signers": signers,
        "from_email": "no-reply@wethinkcode.co.za",
        "message": "Please sign this document. \n\n Kind regards, \n\n WeThinkCode_",
        "subject": "WeThinkCode_ has sent you a SignRequest",
        "who": "o",
        "needs_to_sign": "true",
        "prefill_tags": prefill_tags,
        # Add other parameters here
    }
    
    response = requests.post(
        "https://wethinkocode.signrequest.com/api/v1/signrequest-quick-create/",
        headers={"Authorization": "Token c37da7fb557f0208fd1fbf18dc6896a5bff4e9ef"},
        json=data
    )


    json_response = json.dumps(response.json(), indent=4)

    
    if response.status_code == 201:
        print(f"Signer: {signers[0]['email']} , Status: {response.status_code}")
        return True
    elif response.status_code == 400:
        print("Not found.")
        print("Response: ", json_response)
    
    return False

**Send out sign requests**

In [148]:
df_columns = cohort_df.columns

for index, row in cohort_df.iterrows():
    prefill_tags = create_tags(row, df_columns)
    #set signers
    signers = [{"email": row["learner_email"]}]
    #signers = [{"email": "mufaro@thoughtquest.co.za"}]
    send_signrequest(prefill_tags, signers, template_id)

Signer: damangue@student.wethinkcode.co.za , Status: 201
Signer: lmatsabu@student.wethinkcode.co.za , Status: 201
Signer: nknkosi@student.wethinkcode.co.za , Status: 201
Signer: smciwa@student.wethinkcode.co.za , Status: 201
Signer: tkwayiba@student.wethinkcode.co.za , Status: 201
Signer: lmoykwen@student.wethinkcode.co.za , Status: 201


In [84]:
for column in cohort_df.columns:
    print(f"{column} : {cohort_df[column].values[0]}")

learner_fullname : Tee-Jay Bird
id_number : 0008035361081
learner_surname : Bird
learner_name : Tee-jay
learner_initials : TB
rural_urban : Urban
municipal : Cape Town Metro
home_language : English
citizen_no : 
citizen_yes : X
learner_email : tbird@student.wethinkcode.co.za
learner_phone : 27659153881
home_address : 18 18th Street, Avon, Elsies River, 7490
residential_address : 18 18th Street,Avon,Elsies River,7490
disability_specify : no
disability_no : X
disability_yes : 
coloured : X
male : X
birth_date : 2000-03-08
start_date : 01 March 2021


In [123]:
sponsor_df["employer_trading_name"] = [""]

In [147]:
cohort_df.shape[0]

6