In [1]:
## imporrt libarary 
import os
import pandas as pd
import numpy as np

#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

## sklearn module
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import pickle


In [2]:
## set up authentication using services account 
# Authenticate using service account
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../qemhal-012/sa-development.json"
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../qemhal-012/dti-ds-31329ac0651d.json"

In [3]:
project_id = 'dti-ds'
dataset_id = 'qemhal_dataset_012'
table_id = 'clv_test'
region = 'us-central1'
bucket_name = 'qemhal_gcs_012'
blob_name = 'data/---.csv'
model_name = 'gb_model_final.pkl'

# Load and Data Preparation

In [4]:
#load data from BQ
## using bigquery client 
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
clv_test = query_job.result().to_dataframe()

clv_test.head()




Unnamed: 0,vehicle_class,coverage,renew_offer_type,employmentstatus,marital_status,education,number_of_policies,monthly_premium_auto,total_claim_amount,income
0,Four-Door Car,Extended,Offer1,Retired,Divorced,College,8.0,76.0,364.8,26952.0
1,Two-Door Car,Extended,Offer1,Retired,Divorced,College,8.0,84.0,403.2,13688.0
2,Four-Door Car,Extended,Offer1,Retired,Divorced,High School or Below,4.0,84.0,604.8,12201.0
3,Four-Door Car,Extended,Offer1,Retired,Married,College,5.0,96.0,460.8,23091.0
4,Four-Door Car,Extended,Offer1,Retired,Single,Bachelor,2.0,96.0,460.8,16794.0


In [5]:
# Load model
try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    blob_model = bucket.blob(f"data/{model_name}")
    blob_model.download_to_filename("gb_model_final.pkl")

    print("Read model successful")
except:
    raise TypeError("An exeption occurred")

Read model successful


# Predicting with Machine Learning Model

In [6]:
single_test_data = pd.DataFrame({
    'vehicle_class':['Four-Door Car', 'Four-Door Car'],
    'coverage':['Premium', 'Premium'],
    'renew_offer_type':['Offer1', 'Offer1'],
    'employmentstatus':['Employed', 'Employed'],
    'marital_status':['Single', 'Single'],
    'education':['Bachelor', 'Bachelor'],
    'number_of_policies':[2.0, 1.0],
    'monthly_premium_auto':[500.0, 100.0],
    'total_claim_amount':[5800.0, 2400.0],
    'income':[34000.0, 34000.0]
})
single_test_data

Unnamed: 0,vehicle_class,coverage,renew_offer_type,employmentstatus,marital_status,education,number_of_policies,monthly_premium_auto,total_claim_amount,income
0,Four-Door Car,Premium,Offer1,Employed,Single,Bachelor,2.0,500.0,5800.0,34000.0
1,Four-Door Car,Premium,Offer1,Employed,Single,Bachelor,1.0,100.0,2400.0,34000.0


In [7]:
# Predict single data with maximum extreme and minimum extreme
with open("gb_model_final.pkl", 'rb') as f:
    loaded_model = pickle.load(f)

loaded_model.predict(single_test_data)

array([18239.34838624,  3825.70643468])

In [9]:
# Predict bulk data
with open("gb_model_final.pkl", 'rb') as f:
    loaded_model = pickle.load(f)

y_pred = loaded_model.predict(clv_test)
y_pred.shape

(960,)

In [10]:
clv_test["clv_pred"] = y_pred
clv_test.head()

Unnamed: 0,vehicle_class,coverage,renew_offer_type,employmentstatus,marital_status,education,number_of_policies,monthly_premium_auto,total_claim_amount,income,clv_pred
0,Four-Door Car,Extended,Offer1,Retired,Divorced,College,8.0,76.0,364.8,26952.0,5739.755242
1,Two-Door Car,Extended,Offer1,Retired,Divorced,College,8.0,84.0,403.2,13688.0,6056.543474
2,Four-Door Car,Extended,Offer1,Retired,Divorced,High School or Below,4.0,84.0,604.8,12201.0,6078.054638
3,Four-Door Car,Extended,Offer1,Retired,Married,College,5.0,96.0,460.8,23091.0,7189.065238
4,Four-Door Car,Extended,Offer1,Retired,Single,Bachelor,2.0,96.0,460.8,16794.0,11821.093566


# Importing back to BigQuery table

In [11]:

table_id = "clv_car_predicted"

# construct a BigQuery client object
client = bigquery.Client()

table_full_id = f"{client.project}.{dataset_id}.{table_id}"

clv_test.columns = ['_'.join(i.split(' ')) for i in clv_test.columns]

# Loading the dataframe to bigquery table
job = client.load_table_from_dataframe(clv_test, table_full_id)

job.result()
print(f"Loaded {job.output_rows} rows into {table_full_id}")

Loaded 960 rows into dti-ds.qemhal_dataset_012.clv_car_predicted
