In [51]:
## import libarary 
import os
import pandas as pd
import numpy as np

#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform
import db_dtypes

## sklearn module
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, fbeta_score
from sklearn.preprocessing import LabelEncoder
import pickle


In [2]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "trial_bigq.json"

In [3]:
project_id = 'dtidsus'
dataset_id = 'capstone'
table_id = 'data_telco_customer_churn'
region = 'us-central1'
bucket_name = 'modul4'
blob_name = 'sekar/data_telco_customer_churn.csv'

In [4]:
client = bigquery.Client(project='dtidsus')

In [17]:
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    data_capstone = bucket.blob('sekar/data_telco_customer_churn.csv')
    data_capstone.upload_from_filename('data/data_telco_customer_churn.csv')

    print ("Downloading model succeeded")
except:
    raise TypeError("An exception occurred")

Downloading model succeeded


In [5]:
client = bigquery.Client('dtidsus')

In [18]:
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")

In [54]:
df = query_job.result().to_dataframe()



In [30]:
df.head()

Unnamed: 0,Dependents,tenure,OnlineSecurity,OnlineBackup,InternetService,DeviceProtection,TechSupport,Contract,PaperlessBilling,MonthlyCharges,Churn
0,False,49,No internet service,No internet service,No,No internet service,No internet service,One year,False,19.0,False
1,True,13,No internet service,No internet service,No,No internet service,No internet service,One year,False,20.0,False
2,False,11,No internet service,No internet service,No,No internet service,No internet service,One year,False,20.0,False
3,True,11,No internet service,No internet service,No,No internet service,No internet service,One year,True,20.0,False
4,False,36,No internet service,No internet service,No,No internet service,No internet service,One year,False,20.0,False


In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4930 entries, 0 to 4929
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Dependents        4930 non-null   boolean
 1   tenure            4930 non-null   Int64  
 2   OnlineSecurity    4930 non-null   object 
 3   OnlineBackup      4930 non-null   object 
 4   InternetService   4930 non-null   object 
 5   DeviceProtection  4930 non-null   object 
 6   TechSupport       4930 non-null   object 
 7   Contract          4930 non-null   object 
 8   PaperlessBilling  4930 non-null   boolean
 9   MonthlyCharges    4930 non-null   float64
 10  Churn             4930 non-null   boolean
dtypes: Int64(1), boolean(3), float64(1), object(6)
memory usage: 342.0+ KB


In [47]:
df['Churn'].value_counts()

Churn
False    3614
True     1316
Name: count, dtype: Int64

Currently, the dataset from bigquery shows different values for these features:

`Dependents`, `PaperlessBilling`, `Churn`

It shows boolean values `True` and `False` so we will modify it to 'Yes' and 'No' to make it consistent with our dataset

In [55]:
convert_cols = ['Dependents', 'PaperlessBilling', 'Churn']
df[convert_cols] = df[convert_cols].astype(str)
df[convert_cols] = df[convert_cols].replace({'True': 'Yes', 'False': 'No'})

In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4930 entries, 0 to 4929
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Dependents        4930 non-null   object 
 1   tenure            4930 non-null   Int64  
 2   OnlineSecurity    4930 non-null   object 
 3   OnlineBackup      4930 non-null   object 
 4   InternetService   4930 non-null   object 
 5   DeviceProtection  4930 non-null   object 
 6   TechSupport       4930 non-null   object 
 7   Contract          4930 non-null   object 
 8   PaperlessBilling  4930 non-null   object 
 9   MonthlyCharges    4930 non-null   float64
 10  Churn             4930 non-null   object 
dtypes: Int64(1), float64(1), object(9)
memory usage: 428.6+ KB


In [24]:
# Use this df if error in bigquery
# df = pd.read_csv('data/data_telco_customer_churn.csv')


In [56]:
# Change values in Churn column
df['Churn'] = np.where(df['Churn']=='Yes',1,0)

# Split the data
X = df.drop(columns='Churn')
y = df['Churn']

# Data splitting
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y ,
    stratify=y, 
    test_size=0.2, 
    random_state=10)

In [57]:
model = pickle.load(open('model.pkl','rb'))
model.fit(X_train, y_train)

# Evaluation
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))
print(f'F2 Score: {fbeta_score(y_test, predictions, beta=2)}')

              precision    recall  f1-score   support

           0       0.95      0.54      0.69       723
           1       0.42      0.92      0.58       263

    accuracy                           0.64       986
   macro avg       0.69      0.73      0.63       986
weighted avg       0.81      0.64      0.66       986

F2 Score: 0.7463144963144963


## Uploading Model

In [12]:
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob('sekar/model/model.pkl')
    blob_model.upload_from_filename('model.pkl')

    print ("Uploading model succeeded")
except:
    raise TypeError("An exception occurred")

Uploading model succeeded


In [13]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "dev_trial.json"

In [14]:
aiplatform.init(project='dtidsus', location='us-central1')

model = aiplatform.Model.upload(
    display_name='sekar_model_000',
    artifact_uri="gs://modul4/sekar/model/",
    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-2:latest",
)

model.wait()

Creating Model
Create Model backing LRO: projects/41965541199/locations/us-central1/models/6296970162482446336/operations/1990588183439474688
Model created. Resource name: projects/41965541199/locations/us-central1/models/6296970162482446336@1
To use this Model in another session:
model = aiplatform.Model('projects/41965541199/locations/us-central1/models/6296970162482446336@1')


In [15]:
endpoint = aiplatform.Endpoint.create(
    display_name="sekar-endpoint-000",
    project='dtidsus',
    location='us-central1',
)

Creating Endpoint
Create Endpoint backing LRO: projects/41965541199/locations/us-central1/endpoints/8605192514858647552/operations/1211465447904378880
Endpoint created. Resource name: projects/41965541199/locations/us-central1/endpoints/8605192514858647552
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/41965541199/locations/us-central1/endpoints/8605192514858647552')


In [16]:
min_replica_count: int = 1
max_replica_count: int = 1

In [18]:
endpoint.deploy( 
    model=model,
    deployed_model_display_name='sekar_model_000',
    machine_type='e2-standard-2',
    min_replica_count=min_replica_count,
    max_replica_count=max_replica_count,
    sync=True, 
)

Deploying Model projects/41965541199/locations/us-central1/models/6296970162482446336 to Endpoint : projects/41965541199/locations/us-central1/endpoints/8605192514858647552
Deploy Endpoint model backing LRO: projects/41965541199/locations/us-central1/endpoints/8605192514858647552/operations/5453856296887386112


FailedPrecondition: 400 Model server exited unexpectedly. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=41965541199&resource=aiplatform.googleapis.com%2FEndpoint&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FEndpoint%22%0Aresource.labels.endpoint_id%3D%228605192514858647552%22%0Aresource.labels.location%3D%22us-central1%22. 9: Model server exited unexpectedly. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=41965541199&resource=aiplatform.googleapis.com%2FEndpoint&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FEndpoint%22%0Aresource.labels.endpoint_id%3D%228605192514858647552%22%0Aresource.labels.location%3D%22us-central1%22.

## Using Endpoint

In [45]:
## predict your data with online prediction here 
PROJECT_ID = 'dtidsus'
ENDPOINT_ID = "projects/41965541199/locations/us-central1/endpoints/"
REGION = 'us-central1'



In [None]:
predict_inst = ['Yes', 5, 'No', 'No', 'DSL', 'Yes', 'Yes', 'One year', 'No', 20]

In [46]:
aiplatform.init(project=PROJECT_ID, location=REGION)
endpoint = aiplatform.Endpoint(ENDPOINT_ID)
prediction = endpoint.predict(instances=[[50000, 1, 3, 1, 23, 131895, 132154, 129237]])

print("PREDICTION:", prediction)

PREDICTION: Prediction(predictions=[0.0], deployed_model_id='6724688981780332544', metadata=None, model_version_id='1', model_resource_name='projects/41965541199/locations/us-central1/models/4379281146152747008', explanations=None)


## Predict using Manual File

In [13]:
try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    blob_model = bucket.blob('sekar/model/model.pkl')
    blob_model.download_to_filename('model_new.pkl')

    print ("download model succeeded")
except:
    raise TypeError("An exception occurred")

download model succeeded


In [14]:
model_load = pickle.load(open('model_new.pkl','rb'))

In [26]:
y_predict = model_load.predict(X_test)

In [27]:
y_predict

array([1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,