# Application Data

In [None]:
# Enter the loan application id here (100211, 200211 or 300211)

application_id='300211'

In [None]:
from datetime import datetime
from prettytable import PrettyTable, ALL
from tabulate import tabulate
import pandas as pd

from google.cloud.bigquery import Client, QueryJobConfig
client = Client()

query = f"""SELECT * FROM `<<project name>>.<<dataset name>>.loan-application` where application_id='{application_id}'"""
job = client.query(query)

df_applicant = job.to_dataframe()
df_applicant2 = df_applicant.set_index("application_id", drop = False)

In [None]:
t = PrettyTable(['Application Field', 'Value'])
t.align["Application Field"] = "l"
t.align["Value"] = "l"
t.hrules=ALL

# Applicant Data
applicant_name = df_applicant2.loc[application_id,"applicant_name"]
applicant_employer = df_applicant2.loc[application_id,"applicant_employer"]
applicant_dob_1 = df_applicant2.loc[application_id,"applicant_dob"]
applicant_dob = applicant_dob_1.strftime("%d-%m-%Y")
gender = df_applicant2.loc[application_id,"gender"]
residential_area_type = df_applicant2.loc[application_id,"residential_area_type"]
loan_amount = df_applicant2.loc[application_id,"loan_amount"]
loan_term = df_applicant2.loc[application_id,"loan_term"]
applicant_income = df_applicant2.loc[application_id,"applicant_income"] 
coapplicant_income = df_applicant2.loc[application_id,"coapplicant_income"]
employment_type = df_applicant2.loc[application_id,"employment_type"]
education_status = df_applicant2.loc[application_id,"education_status"]
marital_status = df_applicant2.loc[application_id,"marital_status"]
dependents = df_applicant2.loc[application_id,"dependents"] 
credit_history = df_applicant2.loc[application_id,"credit_history"]


t.add_row(["Applicant Name:", applicant_name])
t.add_row(["Applicant Employer:", applicant_employer])
t.add_row(["Applicant DoB:", applicant_dob])
t.add_row(["Gender:", gender])
t.add_row(["Residential Area:", residential_area_type])
t.add_row(["Loan Amount:", loan_amount])
t.add_row(["Loan Term:", loan_term])
t.add_row(["Applicant Income:", applicant_income])
t.add_row(["Coapplicant Income:", coapplicant_income])
t.add_row(["Employment Type:", employment_type])
t.add_row(["Education Status:", education_status])
t.add_row(["Marital Status:", marital_status])
t.add_row(["Dependents:", dependents])
t.add_row(["Credit History:", credit_history])



In [None]:
print(t)

# Document Validation Checks

In [None]:
project_id= << GCP project id >>
location = 'us'
payslip_emp_name = ''
payslip_employer_name = ''
dl_full_name = ''
dl_date_of_birth = ''
dl_dob_match = ''
doc_ver_result = ''

#pip install thefuzz, pip install python-Levenshtein, #pip install fuzzywuzzy

processor_id = << Doc AI processor id >> # PAYSLIP PARSER
file_path = f"./LoanApplications/{application_id}/payslip.pdf"

from google.cloud import documentai_v1beta3 as documentai
from google.cloud import storage
from prettytable import PrettyTable, ALL
from PIL import Image, ImageDraw
#from thefuzz import fuzz
from fuzzywuzzy import fuzz

def process_document(
    project_id=project_id, location=location, processor_id=processor_id,  file_path=file_path
):
    
    # Instantiates a client
    client = documentai.DocumentProcessorServiceClient()
    
    
    #############################################################
    # PAYSLIP
    #############################################################
    
    project_id= << GCP project id >>
    processor_id = << Doc AI processor id >> # PAYSLIP PARSER
    file_path = f"./LoanApplications/{application_id}/payslip.pdf"

    # The full resource name of the processor
    name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"

    with open(file_path, "rb") as image:
        image_content = image.read()

    # Read the file into memory
    document = {"content": image_content, "mime_type": "application/pdf"}

    # Configure the process request
    request = {"name": name, "document": document}

    # Use the Document AI client to process the sample form
    result = client.process_document(request=request)

    document = result.document
    document_text = document.text
    
    #############################################################
    # Entity extraction for PAYSLIP
    #############################################################
    
    entities = document.entities
    
    # Grab each key/value pair and their corresponding confidence scores.
    t = PrettyTable(['Type', 'Value', 'Confidence'])
    t.hrules=ALL
    
    for entity in entities:
        entity_type = entity.type_
        value = entity.mention_text
        confience = round(entity.confidence,4)
        if (entity_type == 'employee_name'): payslip_emp_name = value
        if (entity_type == 'employer_name'): payslip_employer_name = value
        t.add_row([entity_type, value.strip()[:100], confience])
        
    print("\n\nPayslip Entities.")
    print(t)
    
    #############################################################
    # DRIVER'S LICENSE
    #############################################################
    
    project_id= << GCP project id >>
    processor_id = << Doc AI processor id >> # DL PARSER
    file_path = f"./LoanApplications/{application_id}/drivinglicense.pdf"

    # The full resource name of the processor
    name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"

    with open(file_path, "rb") as image:
        image_content = image.read()

    # Read the file into memory
    document = {"content": image_content, "mime_type": "application/pdf"}

    # Configure the process request
    request = {"name": name, "document": document}

    # Use the Document AI client to process the sample form
    result = client.process_document(request=request)

    document = result.document
    document_text = document.text
    
    #############################################################
    # Entity extraction for DL
    #############################################################
    
    entities = document.entities
    
    # Grab each key/value pair and their corresponding confidence scores.
    t = PrettyTable(['Type', 'Value', 'Confidence'])
    t.hrules=ALL
    
    for entity in entities:
        entity_type = entity.type_
        value = entity.mention_text
        confience = round(entity.confidence,4)
        if (entity_type == 'full_name'): dl_full_name = value
        if (entity_type == 'date_of_birth'): dl_date_of_birth = value
        t.add_row([entity_type, value.strip()[:100], confience])
        
    print("\n\nIndia DL Entities.")
    print(t)
    
    

   
    t = PrettyTable(['Entity','Loan Appl', 'Payslip', 'PS Similarity', 'Driver Lic', 'DL Similarity','Result'])
    
    t.align["Entity"] = "l"
    t.align["Loan Appl"] = "l"
    t.align["Payslip"] = "l"
    t.align["PS Similarity"] = "c"
    t.align["Driver Lic"] = "l"
    t.align["DL Similarity"] = "c"
    t.hrules=ALL
    
    if (fuzz.ratio(applicant_name.upper(), payslip_emp_name.upper()) > 70 and fuzz.ratio(applicant_name, dl_full_name) > 70):
        doc_ver_result = 'PASS'
    else:
        doc_ver_result = 'FAIL'
    t.add_row(['Name', applicant_name, payslip_emp_name, 
               fuzz.ratio(applicant_name.upper(), payslip_emp_name.upper()),
               dl_full_name,fuzz.ratio(applicant_name, dl_full_name),doc_ver_result])
    
    if (fuzz.ratio(applicant_employer.upper(), payslip_employer_name.upper()) > 70):
        doc_ver_result = 'PASS'
    else:
        doc_ver_result = 'FAIL'
    t.add_row(['Employer', applicant_employer, payslip_employer_name, 
               fuzz.ratio(applicant_employer.upper(), payslip_employer_name.upper()),
               '--','--',doc_ver_result])
    
    clean_dl_dob = dl_date_of_birth.replace("/","").replace("-","")
    clean_appl_dob = applicant_dob.replace("/","").replace("-","")
    # print("Check this", fuzz.ratio(clean_dl_dob, clean_appl_dob))

    
    if (fuzz.ratio(clean_dl_dob, clean_appl_dob) < 100): 
        dl_dob_match = 'Mismatch Alert'
        doc_ver_result = 'FAIL'
    else: 
        dl_dob_match = 'Matched'
        doc_ver_result = 'PASS'
    t.add_row(['Birth Date', applicant_dob, '--','--',dl_date_of_birth,dl_dob_match,doc_ver_result])


    print("\n\nDocument Verification Checks.")
    print(t)
    
    
    #############################################################
    # Identity Proofing
    #############################################################
    
    project_id= << GCP project id >>
    processor_id = << Doc AI processor id >> # Identity Proofing Parser
    file_path = f"./LoanApplications/{application_id}/drivinglicense.pdf"

    # The full resource name of the processor
    name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"

    with open(file_path, "rb") as image:
        image_content = image.read()

    # Read the file into memory
    document = {"content": image_content, "mime_type": "application/pdf"}

    # Configure the process request
    request = {"name": name, "document": document}

    # Use the Document AI client to process the sample form
    result = client.process_document(request=request)

    document = result.document
    document_text = document.text
    
    #############################################################
    # Entity extraction for DL
    #############################################################
    
    entities = document.entities
    
    # Grab each key/value pair and their corresponding confidence scores.
    t = PrettyTable(['Type', 'Value'])
    t.hrules=ALL
    
    for entity in entities:
        entity_type = entity.type_
        value = entity.mention_text
        confience = round(entity.confidence,4)
        t.add_row([entity_type, value.strip()[:50]])
        
    print("\n\nID Doc Proofing Results.")
    print(t)
    

    
def get_text(doc_element: dict, document: dict):
    """
    Document AI identifies form fields by their offsets
    in document text. This function converts offsets
    to text snippets.
    """
    response = ""
    # If a text segment spans several lines, it will
    # be stored in different text segments.
    for segment in doc_element.text_anchor.text_segments:
        start_index = (
            int(segment.start_index)
            if segment in doc_element.text_anchor.text_segments
            else 0
        )
        end_index = int(segment.end_index)
        response += document.text[start_index:end_index]
    return response


In [None]:
process_document(project_id,location,processor_id,file_path)

# Alernate Data - SMS data inferences

In [None]:
from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def predict_text_classification_single_label_sample(
    project: str,
    endpoint_id: str,
    content: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    instance = predict.instance.TextClassificationPredictionInstance(
        content=content,
    ).to_value()
    instances = [instance]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    
    t = PrettyTable(['Classification','Confidence'])
    t.align["Classification"] = "l"

    predictions = response.predictions
    for prediction in predictions:
        #print(" prediction:", dict(prediction))
    
        for i, kv in enumerate(prediction.items()):
            if (kv[0] == "displayNames"): 
                #print("Field Names", kv[1])
                fieldnames = kv[1]
            if (kv[0] == "confidences"): 
                #print("Start Offsets", kv[1])
                myconfidences = kv[1]

    for pk in range(len(fieldnames)):
        myconfnew = "{:.2f}".format(myconfidences[pk])
        t.add_row ([fieldnames[pk], myconfnew])
            
    print("\n\nClassification from this SMS.")
    t.sortby="Confidence"
    t.reversesort = True
    print(t)

In [None]:
from google.cloud.bigquery import Client, QueryJobConfig
client = Client()

query = f"""SELECT sms_text FROM `<< project id >>.<< dataset id >>.loan-appl-sms-data` where application_id={application_id} and 
classification != 'credit_txn'"""
job = client.query(query)


df_sms = job.to_dataframe()
for mpk in range(len(df_sms)):
    
    sms_txt = df_sms.loc[mpk,"sms_text"]
    print("\nSMS text: ", sms_txt)
    predict_text_classification_single_label_sample(
        project="54002134587",
        endpoint_id="4676966222791704576",
        location="us-central1",
        content=sms_txt)


In [None]:
from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def predict_text_entity_extraction_sample(
    project: str,
    endpoint_id: str,
    content: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    # The format of each instance should conform to the deployed model's prediction input schema
    instance = predict.instance.TextExtractionPredictionInstance(
        content=content,
    ).to_value()
    instances = [instance]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    #print("response")
    #print(" deployed_model_id:", response.deployed_model_id)
    # See gs://google-cloud-aiplatform/schema/predict/prediction/text_extraction_1.0.0.yaml for the format of the predictions.
    
    t = PrettyTable(['Entity Name','Entity Value'])
    t.align["Entity Name"] = "l"
    
    
    predictions = response.predictions
    for prediction in predictions:
        for i, kv in enumerate(prediction.items()):
            if (kv[0] == "displayNames"): 
                #print("Field Names", kv[1])
                fieldnames = kv[1]
            if (kv[0] == "textSegmentStartOffsets"): 
                #print("Start Offsets", kv[1])
                startoffsets = kv[1]
            if (kv[0] == "textSegmentEndOffsets"): 
                #print("End Offsets", kv[1])
                endoffsets = kv[1]

    for pk2 in range(len(fieldnames)):
        t.add_row ([fieldnames[pk2], content[int(startoffsets[pk2]):int(endoffsets[pk2])]])
            
    print("\n\nEntities extracted from this SMS.")
    print(t)
        

In [None]:
from google.cloud.bigquery import Client, QueryJobConfig
client = Client()

query = f"""SELECT sms_text FROM `<< project id >>.<< dataset id >>.loan-appl-sms-data` where application_id={application_id} and 
classification = 'credit_txn'"""
job = client.query(query)


df_sms = job.to_dataframe()
for mpk in range(len(df_sms)):
    sms_txt = df_sms.loc[mpk,"sms_text"]
    print("\nSMS text: ", sms_txt)
    predict_text_entity_extraction_sample(
        project="54002134587",
        endpoint_id="7088696599806738432",
        location="us-central1",
        content=sms_txt)


In [None]:
from google.cloud.bigquery import Client, QueryJobConfig
client = Client()

query = f"""SELECT FORMAT_DATE('%b %Y', txn_date) txn_month, avg(avl_balance) monthly_average_balance \
FROM `fintech-demo-prasanna-386521.loans.loan-appl-sms-data` where \
application_id = {application_id} and classification = 'credit_txn' group by txn_month"""

job = client.query(query)
df_avl_bal = job.to_dataframe()
print(tabulate(df_avl_bal, headers='keys', tablefmt='psql'))

query = f"""SELECT avg(avl_balance) monthly_average_balance \
FROM `<< project id >>.<< dataset id >>.loan-appl-sms-data` where \
application_id = {application_id} and classification = 'credit_txn'"""

job = client.query(query)
df_avl_bal = job.to_dataframe()

avg_monthly_bal = df_avl_bal.loc[0,"monthly_average_balance"]
print("\n\nMonthly Average Balance: ","{:.2f}".format(avg_monthly_bal))

# Credit Scoring

In [None]:
MODEL_ID = << Model ID >>
ENDPOINT_ID = << Endpoint ID >>

In [None]:
# Features preparation

if credit_history == 'True': 
    credit_history_0 = 0
    credit_history_1 = 1
else:
    credit_history_0 = 1
    credit_history_1 = 0
    
if marital_status == 'Married':
    Married_No = 0
    Married_Yes = 1
else:
    Married_No = 1
    Married_Yes = 0
        
if gender == 'M':
    gender_Female = 0
    gender_Male = 1 
elif gender == 'F':
    gender_Female = 1
    gender_Male = 0 
    
if education_status == 'Graduate':
    Education_Graduate = 1
    Education_Not_Graduate = 0
else:
    Education_Graduate = 0
    Education_Not_Graduate = 1
        
if employment_type == 'Salaried':
    Self_Employed_No = 1 
    Self_Employed_Yes = 0
elif employment_type == 'Business':
    Self_Employed_No = 0 
    Self_Employed_Yes = 1
        
if residential_area_type == 'Rural':
    Property_Area_Rural = 1
    Property_Area_Semiurban = 0
    Property_Area_Urban = 0
elif residential_area_type == 'Semiurban':
    Property_Area_Rural = 0
    Property_Area_Semiurban = 1
    Property_Area_Urban = 0
else:
    Property_Area_Rural = 0
    Property_Area_Semiurban = 0
    Property_Area_Urban = 1

In [None]:
featurestr = "{\n  \"instances\": [\n    [" + f'{dependents}, {applicant_income},{coapplicant_income},\
{loan_amount},{loan_term},{avg_monthly_bal},{gender_Female},{gender_Male},{Married_No},{Married_Yes},{Education_Graduate},\
{Education_Not_Graduate},{Self_Employed_No},{Self_Employed_Yes},{credit_history_0},{credit_history_1},\
{Property_Area_Rural},{Property_Area_Semiurban},{Property_Area_Urban}'+ "]\n  ]\n}"


text_file = open("predictions.json", "w")
n = text_file.write(featurestr)
text_file.close()

In [None]:
!gcloud beta ai endpoints predict $ENDPOINT_ID \
--json-request=predictions.json \
--region=us-central1