## Notes : please use python version > 3.10 or 3.11 to run all the codes below 

# Installations

# Import Library

In [57]:
## imporrt libarary 
import os
import pandas as pd
import numpy as np

#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform
from support_functions import missing_value, fill_missing, list_dtypes

## sklearn module
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import pickle


### Authenticate to GCP using Services Account 

- Depending on your Jupyter environment, you may have to manually 

- Call "sa-development.json" as teh auth to GCP 

- Adjust your path where the all code stored in you local 

In [58]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "./sa-development.json"

## Replace the code below with yours configuration 
- Configuratin naming convention has shared on the following link [configuration name](https://docs.google.com/spreadsheets/d/1U7bbXp9Y6uLGZXThfsqDneo1U-E3J8nwCcxIdpjgZmI/edit#gid=1834500505)

- Example configuration

    ```
    project_id = 'dti-ds'
    dataset_id = 'jaya_dataset_000'
    table_id = 'german_dataset'
    region = 'us-central1'
    bucket_name = 'jaya_gcs_000'
    blob_name = 'data/german_dataset.csv'
    
    ```


In [61]:
project_id = 'dti-ds'
dataset_id = 'farikhin_dataset_006'
table_id = 'ecommerce_customer_churn_test'
region = 'us-central1'
bucket_name = 'farikhin_gcs_006'

In [65]:
import os

model_name = "best_model.pkl"
local_model_dir = "models"
local_model_path = os.path.join(local_model_dir, model_name)

# Ensure the local directory exists
os.makedirs(local_model_dir, exist_ok=True)

try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    blob_model = bucket.blob(f"model/{model_name}")
    blob_model.download_to_filename(local_model_path)

    print("Retrieve model succeeded")
except Exception as e:
    raise TypeError(f"An error occurred: {e}")

Retrieve model succeeded


## Model Predictions

In [66]:
import pickle
import sklearn
import xgboost

from sklearn.compose import ColumnTransformer
from category_encoders import BinaryEncoder


model_name = "best_model.pkl"
model_path = f"models/{model_name}"

loaded_model = pickle.load(open(model_path, 'rb'))

# Sample data for prediction
test_data = [
    [5.0, 12.0, 4, "Mobile Phone", 4, "Married", 3, 0, 3.0, 152.89],
    [1.0, 16.0, 4, "Mobile Phone", 4, "Married", 8, 0, 3.0, 145.24]
]

columns = ['Tenure', 'WarehouseToHome', 'NumberOfDeviceRegistered', 'PreferedOrderCat',
           'SatisfactionScore', 'MaritalStatus', 'NumberOfAddress', 'Complain',
           'DaySinceLastOrder', 'CashbackAmount']

df = pd.DataFrame(test_data, columns=columns)

predictions = loaded_model.predict(df)

predictions

array([0, 1])

In [67]:
# load data from BQ
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
test_df = query_job.result().to_dataframe()

test_df



Unnamed: 0,Tenure,WarehouseToHome,NumberOfDeviceRegistered,PreferedOrderCat,SatisfactionScore,MaritalStatus,NumberOfAddress,Complain,DaySinceLastOrder,CashbackAmount,Churn
0,0.0,13.0,4,Mobile,5,Single,2,1,0.0,126.00,1
1,0.0,13.0,3,Mobile,3,Single,2,1,7.0,123.94,1
2,0.0,12.0,4,Mobile,2,Single,2,1,0.0,129.26,1
3,0.0,12.0,4,Mobile,1,Single,2,1,0.0,129.26,1
4,0.0,15.0,3,Mobile,5,Single,3,1,0.0,131.72,1
...,...,...,...,...,...,...,...,...,...,...,...
648,1.0,25.0,4,Laptop & Accessory,3,Married,2,1,3.0,171.14,1
649,1.0,27.0,4,Laptop & Accessory,3,Single,2,0,3.0,147.72,0
650,1.0,20.0,4,Laptop & Accessory,4,Single,7,0,1.0,182.65,1
651,1.0,6.0,4,Laptop & Accessory,1,Married,2,1,7.0,168.34,0


In [68]:
# rename target
test_df.rename(columns={'Churn': 'y_actual'}, inplace=True)

In [69]:
bulk_predict_df = test_df.drop(['y_actual'], axis=1)
y_actual = test_df['y_actual']
bulk_predict_df

Unnamed: 0,Tenure,WarehouseToHome,NumberOfDeviceRegistered,PreferedOrderCat,SatisfactionScore,MaritalStatus,NumberOfAddress,Complain,DaySinceLastOrder,CashbackAmount
0,0.0,13.0,4,Mobile,5,Single,2,1,0.0,126.00
1,0.0,13.0,3,Mobile,3,Single,2,1,7.0,123.94
2,0.0,12.0,4,Mobile,2,Single,2,1,0.0,129.26
3,0.0,12.0,4,Mobile,1,Single,2,1,0.0,129.26
4,0.0,15.0,3,Mobile,5,Single,3,1,0.0,131.72
...,...,...,...,...,...,...,...,...,...,...
648,1.0,25.0,4,Laptop & Accessory,3,Married,2,1,3.0,171.14
649,1.0,27.0,4,Laptop & Accessory,3,Single,2,0,3.0,147.72
650,1.0,20.0,4,Laptop & Accessory,4,Single,7,0,1.0,182.65
651,1.0,6.0,4,Laptop & Accessory,1,Married,2,1,7.0,168.34


In [70]:
y_pred = loaded_model.predict(bulk_predict_df)
y_pred_df = pd.DataFrame(y_pred, columns=['y_pred'])

result_df = pd.concat([test_df.reset_index(drop=True), y_pred_df.reset_index(drop=True)], axis=1)
result_df

Unnamed: 0,Tenure,WarehouseToHome,NumberOfDeviceRegistered,PreferedOrderCat,SatisfactionScore,MaritalStatus,NumberOfAddress,Complain,DaySinceLastOrder,CashbackAmount,y_actual,y_pred
0,0.0,13.0,4,Mobile,5,Single,2,1,0.0,126.00,1,1
1,0.0,13.0,3,Mobile,3,Single,2,1,7.0,123.94,1,1
2,0.0,12.0,4,Mobile,2,Single,2,1,0.0,129.26,1,1
3,0.0,12.0,4,Mobile,1,Single,2,1,0.0,129.26,1,1
4,0.0,15.0,3,Mobile,5,Single,3,1,0.0,131.72,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
648,1.0,25.0,4,Laptop & Accessory,3,Married,2,1,3.0,171.14,1,1
649,1.0,27.0,4,Laptop & Accessory,3,Single,2,0,3.0,147.72,0,0
650,1.0,20.0,4,Laptop & Accessory,4,Single,7,0,1.0,182.65,1,1
651,1.0,6.0,4,Laptop & Accessory,1,Married,2,1,7.0,168.34,0,0


In [71]:
from sklearn.metrics import confusion_matrix, fbeta_score

tn, fp, fn, tp = confusion_matrix(y_actual, y_pred).ravel()
print(tn, fp, fn, tp)

500 41 16 96


In [75]:
from google.cloud.exceptions import NotFound

dest_table_id = 'ecommerce_customer_churn_prediction'
table_full_id = f'{project_id}.{dataset_id}.{dest_table_id}'

schema = [
    bigquery.SchemaField('Tenure', 'FLOAT'),
    bigquery.SchemaField('WarehouseToHome', 'FLOAT'),
    bigquery.SchemaField('NumberOfDeviceRegistered', 'INTEGER'),
    bigquery.SchemaField('PreferedOrderCat', 'STRING'),
    bigquery.SchemaField('SatisfactionScore', 'INTEGER'),
    bigquery.SchemaField('MaritalStatus', 'STRING'),
    bigquery.SchemaField('NumberOfAddress', 'INTEGER'),
    bigquery.SchemaField('Complain', 'INTEGER'),
    bigquery.SchemaField('DaySinceLastOrder', 'FLOAT'),
    bigquery.SchemaField('CashbackAmount', 'FLOAT'),
    bigquery.SchemaField('y_actual', 'INTEGER'),
    bigquery.SchemaField('y_pred', 'INTEGER')

]

table_ref = client.dataset(dataset_id).table(dest_table_id)

# Check if the table exists
try:
    client.get_table(table_ref)
    print(f'Table {table_full_id} already exists.')
except NotFound:
    # Create the table if it does not exist
    table = bigquery.Table(table_ref, schema=schema)
    table = client.create_table(table)
    print(f'Table {table_full_id} created.')

job_config = bigquery.LoadJobConfig(
    schema=schema,
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE
)

load_job = client.load_table_from_dataframe(result_df, table_ref, job_config=job_config)
load_job.result()

print(f'Loaded {load_job.output_rows} rows into {table_full_id}')

Table dti-ds.farikhin_dataset_006.ecommerce_customer_churn_prediction created.
Loaded 653 rows into dti-ds.farikhin_dataset_006.ecommerce_customer_churn_prediction
