In [None]:
import pandas as pd
import xgboost as xgb
import numpy as np
import collections
import witwidget

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.utils import shuffle


# Read the dataset with Pandas

In [None]:
from google.cloud.bigquery import Client, QueryJobConfig
client = Client()

query = """SELECT * FROM `<<project-name>>.<<dataset-name>>.loans-data`"""
job = client.query(query)
df = job.to_dataframe()

df.head()

In [None]:
COLUMN_NAMES = collections.OrderedDict({
    'Gender': 'category',
    'Married': 'category',
    'Dependents' : np.int64,
    'Education' : 'category',
    'Self_Employed' : 'category',
    'ApplicantIncome' : np.float64,
    'CoapplicantIncome' : np.float64,
    'LoanAmount' : np.float64,
    'Loan_Amount_Term' : np.int64,
    'Credit_History' : 'category',
    'Property_Area' : 'category',
    'Loan_Status' : np.int64,
    'Avg_Monthly_Bal' : np.float64
})

In [None]:
data = pd.read_csv(
 'loans-data.csv',
 index_col=False,
 dtype=COLUMN_NAMES
)

data = shuffle(data, random_state=2)
data.head()

In [None]:
print(data['Loan_Status'].value_counts())

labels = data['Loan_Status'].values
data = data.drop(columns=['Loan_Status'])

# Create dummy column for categorical values

In [None]:
dummy_columns = list(data.dtypes[data.dtypes == 'category'].index)
data = pd.get_dummies(data, columns=dummy_columns)

data.head()

# Split data into train and test sets

In [None]:
x,y = data.values,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

# Build, train, and evaluate an XGBoost model

In [None]:
model = xgb.XGBClassifier(
    objective='reg:logistic'
)

In [None]:
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)
acc = accuracy_score(y_test, y_pred.round())
print(acc, '\n')

In [None]:
model.save_model('model.bst')

# Deploy model to Vertex AI

In [None]:
# Update the variables below to your own Google Cloud project number and GCS bucket name. You can leave the model name we've specified below:
GCP_PROJECT = << Enter GCP project number >>
MODEL_BUCKET = << Enter GCS bucket URI >>
MODEL_NAME = << Enter the name of the model >>

In [None]:
!gsutil cp ./model.bst $MODEL_BUCKET

In [None]:
!gcloud beta ai models upload \
--display-name=$MODEL_NAME \
--artifact-uri=$MODEL_BUCKET \
--container-image-uri=us-docker.pkg.dev/cloud-aiplatform/prediction/xgboost-cpu.1-2:latest \
--region=us-central1

In [None]:
MODEL_ID = << Enter the model id >>

In [None]:
!gcloud beta ai endpoints create \
--display-name=credit-scoring-xgb \
--region=us-central1

In [None]:
ENDPOINT_ID = << Enter the endpoint id >>

In [None]:
!gcloud beta ai endpoints deploy-model $ENDPOINT_ID \
--region=us-central1 \
--model=$MODEL_ID \
--display-name=loanmodel_v1 \
--machine-type=n1-standard-2 \
--traffic-split=0=100

# Test the deployed model

In [None]:
%%writefile predictions.json
{
  "instances": [
    [0,200,100,30000,720,1000,1,0,0,1,0,1,1,0,1,0,0,1,0]
  ]
}

In [None]:
!gcloud beta ai endpoints predict $ENDPOINT_ID \
--json-request=predictions.json \
--region=us-central1

In [None]:
%%writefile predictions.json
{
  "instances": [
    [0,2000,1000,100,6,50000,1,0,0,1,0,1,1,0,0,1,0,0,1]
  ]
}

In [None]:
!gcloud beta ai endpoints predict $ENDPOINT_ID \
--json-request=predictions.json \
--region=us-central1