# <center>New Unseen Data Prediction Classification</center>
---

<center>Travel Insurance Dataset - Mulya Fajar Ningsih Alwi</center>

In [1]:
# Run locally & on the cloud
import pandas as pd
import numpy as np
import pickle
from xgboost import XGBClassifier

In [2]:
new_test_data = pd.DataFrame({
    'Agency' : ['C2B', 'EPX'],
    'Agency Type' : ['Airlines', 'Travel Agency'],
    'Distribution Channel' : ['Online', 'Offline'],
    'Product Name' : ['Basic Plan', 'Cancellation Plan'],
    'Gender' : ['F', 'M'],
    'Duration' : [180, 7],
    'Destination' : ['Thailand', 'Indonesia'],
    'Net Sales' : [165.00, 14.00],
    'Commision (in value)' : [33.00, 3.00],
    'Age' : [80, 30]
})

new_test_data

Unnamed: 0,Agency,Agency Type,Distribution Channel,Product Name,Gender,Duration,Destination,Net Sales,Commision (in value),Age
0,C2B,Airlines,Online,Basic Plan,F,180,Thailand,165.0,33.0,80
1,EPX,Travel Agency,Offline,Cancellation Plan,M,7,Indonesia,14.0,3.0,30


# Variable Need for Google Cloud (or You could just upload all of them to Github)

In [3]:
#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

In [4]:
## set up authentication using services account 
import os
# Authenticate using service account
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "sa-development.json"
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../vertex-ai-ml/dti-ds-31329ac0651d.json"

project_id = 'dti-ds'
dataset_id = 'ninis_dataset_016'
table_id = 'test_data'
region = 'us-central1'
bucket_name = 'ninis_gcs_016'
blob_name = 'data/test_data.csv'

model_name = 'Travel_Insurance_XGBoost_Model.sav'

## Google Cloud Storage

### Retrieve from Google Cloud Storage

In [5]:
# Retrieve the model from Google Cloud Storage
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob(f'model/{model_name}')
    blob_model.download_to_filename('Travel_Insurance_XGBoost_Model.sav')

    print ("Read model succeeded")
except:
    raise TypeError("An exception occurred")

Read model succeeded


## Bigquery

### Load from Bigquery

In [6]:
#load data from BQ
from google.cloud import bigquery
## using bigquery client 
# client = bigquery.Client(credentials=credentials,project=project_id)
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
auto_cloud = query_job.result().to_dataframe()

# df = df.loc[:, new_test_data.columns]
auto_cloud.columns = new_test_data.columns
# cleansing 
# result = df.drop(['int64_field_0'], axis = 1)
auto_cloud



Unnamed: 0,Agency,Agency Type,Distribution Channel,Product Name,Gender,Duration,Destination,Net Sales,Commision (in value),Age
0,EPX,Travel Agency,Online,Cancellation Plan,Prefer Not to Say,11,NEW ZEALAND,0.00,0.00,36
1,EPX,Travel Agency,Online,Cancellation Plan,Prefer Not to Say,115,"TAIWAN, PROVINCE OF CHINA",0.00,0.00,36
2,EPX,Travel Agency,Online,2 way Comprehensive Plan,Prefer Not to Say,9,SINGAPORE,0.00,0.00,36
3,EPX,Travel Agency,Online,1 way Comprehensive Plan,Prefer Not to Say,3,THAILAND,0.00,0.00,27
4,EPX,Travel Agency,Online,2 way Comprehensive Plan,Prefer Not to Say,8,UNITED KINGDOM,0.00,0.00,36
...,...,...,...,...,...,...,...,...,...,...
7829,SSI,Airlines,Online,Ticket Protector,Prefer Not to Say,1,SINGAPORE,5.23,1.47,48
7830,SSI,Airlines,Online,Ticket Protector,Prefer Not to Say,35,SINGAPORE,4.24,1.19,48
7831,ART,Airlines,Online,24 Protect,M,3,MALAYSIA,4.99,1.75,48
7832,SSI,Airlines,Online,Ticket Protector,Prefer Not to Say,64,SINGAPORE,10.98,3.08,48


# Model Prediction

## Cloud Data Prediction

In [7]:
# Load the model
with open('Travel_Insurance_XGBoost_Model.sav', 'rb') as f:
    loaded_model = pickle.load(f)

y_pred_cloud_new_data = loaded_model.predict(new_test_data)
y_pred_cloud_new_data

array([1, 0])

In [8]:
new_test_cloud = new_test_data.copy()
new_test_cloud['Claim Prediction'] = y_pred_cloud_new_data
new_test_cloud

Unnamed: 0,Agency,Agency Type,Distribution Channel,Product Name,Gender,Duration,Destination,Net Sales,Commision (in value),Age,Claim Prediction
0,C2B,Airlines,Online,Basic Plan,F,180,Thailand,165.0,33.0,80,1
1,EPX,Travel Agency,Offline,Cancellation Plan,M,7,Indonesia,14.0,3.0,30,0


In [9]:
loaded_model

In [10]:
# loaded_model.predict(X_test.iloc[3:13])
y_pred_file_cloud = loaded_model.predict(auto_cloud)
y_pred_file_cloud[:13]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [11]:
auto_cloud['Claim Prediction'] = y_pred_file_cloud
auto_cloud

Unnamed: 0,Agency,Agency Type,Distribution Channel,Product Name,Gender,Duration,Destination,Net Sales,Commision (in value),Age,Claim Prediction
0,EPX,Travel Agency,Online,Cancellation Plan,Prefer Not to Say,11,NEW ZEALAND,0.00,0.00,36,1
1,EPX,Travel Agency,Online,Cancellation Plan,Prefer Not to Say,115,"TAIWAN, PROVINCE OF CHINA",0.00,0.00,36,1
2,EPX,Travel Agency,Online,2 way Comprehensive Plan,Prefer Not to Say,9,SINGAPORE,0.00,0.00,36,1
3,EPX,Travel Agency,Online,1 way Comprehensive Plan,Prefer Not to Say,3,THAILAND,0.00,0.00,27,1
4,EPX,Travel Agency,Online,2 way Comprehensive Plan,Prefer Not to Say,8,UNITED KINGDOM,0.00,0.00,36,1
...,...,...,...,...,...,...,...,...,...,...,...
7829,SSI,Airlines,Online,Ticket Protector,Prefer Not to Say,1,SINGAPORE,5.23,1.47,48,1
7830,SSI,Airlines,Online,Ticket Protector,Prefer Not to Say,35,SINGAPORE,4.24,1.19,48,1
7831,ART,Airlines,Online,24 Protect,M,3,MALAYSIA,4.99,1.75,48,1
7832,SSI,Airlines,Online,Ticket Protector,Prefer Not to Say,64,SINGAPORE,10.98,3.08,48,1


---
<center>Thank You</center>

---