In [1]:
from google.cloud import aiplatform
aiplatform.init(project='dataanalytics-347914',location='us-central1')


credit_request_entity_type = aiplatform.EntityType('projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_credit_request')

customer_financial_profile_entity_type = aiplatform.EntityType('projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_financial_profile')

credit_context_entity_type = aiplatform.EntityType('projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_credit_context')

customer_demographics_entity_type = aiplatform.EntityType('projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_demographics')

In [2]:
import pandas as pd
from google.cloud import storage
import pytz
from datetime import datetime 

storage_client = storage.Client()
bucket = storage_client.bucket("udemy-gcp-mlops")

def purpose_encode(x):
    if x == "Consumer Goods":
        return 1
    elif x == "Vehicle":
        return 2
    elif x == "Tuition":
        return 3
    elif x == "Business":
        return 4
    elif x == "Repairs":
        return 5
    else:
        return 0

def other_parties_encode(x):
    if x == "Guarantor":
        return 1
    elif x == "Co-Applicant":
        return 2
    else:
        return 0

def qualification_encode(x):
    if x == "unskilled":
        return 1
    elif x == "skilled":
        return 2
    elif x == "highly skilled":
        return 3
    else:
        return 0

def credit_standing_encode(x):
    if x == "good":
        return 1
    else:
        return 0

def assets_encode(x):
    if x == "Vehicle":
        return 1
    elif x == "Investments":
        return 2
    elif x == "Home":
        return 3
    else:
        return 0

def housing_encode(x):
    if x == "rent":
        return 1
    elif x == "own":
        return 2
    else:
        return 0

def marital_status_encode(x):
    if x == "Married":
        return 1
    elif x == "Single":
        return 2
    else:
        return 0

def other_payment_plans_encode(x):
    if x == "bank":
        return 1
    elif x == "stores":
        return 2
    else:
        return 0

def sex_encode(x):
    if x == "M":
        return 1
    else:
        return 0
    
def credit_score_decode(x):
    return "Approved" if x == 1 else "Denied"

def preprocess_data(df):
    df["PURPOSE_CODE"] = df["PURPOSE"].apply(purpose_encode)
    df["OTHER_PARTIES_CODE"] = df["OTHER_PARTIES"].apply(other_parties_encode)
    df["QUALIFICATION_CODE"] = df["QUALIFICATION"].apply(qualification_encode)
    df["CREDIT_STANDING_CODE"] = df["CREDIT_STANDING"].apply(credit_standing_encode)
    df["ASSETS_CODE"] = df["ASSETS"].apply(assets_encode)
    df["HOUSING_CODE"] = df["HOUSING"].apply(housing_encode)
    df["MARITAL_STATUS_CODE"] = df["MARITAL_STATUS"].apply(marital_status_encode)
    df["OTHER_PAYMENT_PLANS_CODE"] = df["OTHER_PAYMENT_PLANS"].apply(other_payment_plans_encode)
    df["SEX_CODE"] = df["SEX"].apply(sex_encode)

    columns_to_drop = ["PURPOSE", "OTHER_PARTIES", "QUALIFICATION", "CREDIT_STANDING",
                       "ASSETS", "HOUSING", "MARITAL_STATUS", "OTHER_PAYMENT_PLANS", "SEX"]
    df = df.drop(columns=columns_to_drop)

    return df

input_file = "gs://udemy-gcp-mlops/credit_files.csv"

df = pd.read_csv(input_file)
credit_df = preprocess_data(df)

current_time = datetime.now(pytz.utc)
credit_df['update_time'] = current_time

In [3]:
credit_request_cols = ['CREDIT_REQUEST_ID', 'CREDIT_AMOUNT', 'CREDIT_DURATION', 
                       'INSTALLMENT_COMMITMENT', 'CREDIT_SCORE','update_time']

df_credit_request = credit_df[credit_request_cols]

df_credit_request.columns = df_credit_request.columns.str.lower()
df_credit_request['credit_request_id'] = df_credit_request['credit_request_id'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_credit_request['credit_request_id'] = df_credit_request['credit_request_id'].astype(str)


In [4]:
customer_financial_profile_cols = ['CREDIT_REQUEST_ID','CHECKING_BALANCE', 'SAVINGS_BALANCE',
                                  'EXISTING_CREDITS', 'JOB_HISTORY','update_time']

df_customer_financial_profile = credit_df[customer_financial_profile_cols]
df_customer_financial_profile.columns = df_customer_financial_profile.columns.str.lower()

df_customer_financial_profile['credit_request_id'] = df_customer_financial_profile['credit_request_id'].astype(str)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_customer_financial_profile['credit_request_id'] = df_customer_financial_profile['credit_request_id'].astype(str)


In [5]:
credit_context_cols = ['CREDIT_REQUEST_ID','PURPOSE_CODE', 'OTHER_PARTIES_CODE', 
                       'QUALIFICATION_CODE', 'CREDIT_STANDING_CODE', 'ASSETS_CODE', 
                       'HOUSING_CODE', 'MARITAL_STATUS_CODE', 'OTHER_PAYMENT_PLANS_CODE','update_time']

df_credit_context = credit_df[credit_context_cols]

df_credit_context.columns = df_credit_context.columns.str.lower()

df_credit_context['credit_request_id'] = df_credit_context['credit_request_id'].astype(str)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_credit_context['credit_request_id'] = df_credit_context['credit_request_id'].astype(str)


In [6]:
customer_demographics_cols = ['CREDIT_REQUEST_ID','AGE', 'NUM_DEPENDENTS', 
                              'RESIDENCE_SINCE', 'SEX_CODE','update_time']

df_customer_demographics = credit_df[customer_demographics_cols]

df_customer_demographics.columns = df_customer_demographics.columns.str.lower()

df_customer_demographics['credit_request_id'] = df_customer_demographics['credit_request_id'].astype(str)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_customer_demographics['credit_request_id'] = df_customer_demographics['credit_request_id'].astype(str)


In [7]:
columns_to_ingest = ['credit_amount', 'credit_duration','installment_commitment','credit_score']

credit_request_entity_type.ingest_from_df(
    feature_ids=columns_to_ingest,
    feature_time='update_time',
    df_source=df_credit_request,
    entity_id_field="credit_request_id"
)

Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0
Importing EntityType feature values: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_credit_request
Import EntityType feature values backing LRO: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_credit_request/operations/8594815495914389504
EntityType feature values imported. Resource name: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_credit_request


<google.cloud.aiplatform.featurestore.entity_type.EntityType object at 0x7f850ac01060> 
resource name: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_credit_request

In [8]:
columns_to_ingest = ['checking_balance', 'savings_balance','existing_credits','job_history']

customer_financial_profile_entity_type.ingest_from_df(
    feature_ids=columns_to_ingest,
    feature_time='update_time',
    df_source=df_customer_financial_profile,
    entity_id_field="credit_request_id"
)

Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0
Importing EntityType feature values: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_financial_profile
Import EntityType feature values backing LRO: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_financial_profile/operations/6156116297693265920
EntityType feature values imported. Resource name: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_financial_profile


<google.cloud.aiplatform.featurestore.entity_type.EntityType object at 0x7f84f9a39cc0> 
resource name: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_financial_profile

In [9]:
columns_to_ingest = ['purpose_code', 'other_parties_code','qualification_code','credit_standing_code',
                    'assets_code','housing_code','marital_status_code','other_payment_plans_code']

credit_context_entity_type.ingest_from_df(
    feature_ids=columns_to_ingest,
    feature_time='update_time',
    df_source=df_credit_context,
    entity_id_field="credit_request_id"
)

Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0
Importing EntityType feature values: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_credit_context
Import EntityType feature values backing LRO: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_credit_context/operations/6021852733802283008
EntityType feature values imported. Resource name: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_credit_context


<google.cloud.aiplatform.featurestore.entity_type.EntityType object at 0x7f84f9a753c0> 
resource name: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_credit_context

In [10]:
columns_to_ingest = ['age', 'num_dependents','residence_since','sex_code']

customer_demographics_entity_type.ingest_from_df(
    feature_ids=columns_to_ingest,
    feature_time='update_time',
    df_source=df_customer_demographics,
    entity_id_field="credit_request_id"
)

Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0
Importing EntityType feature values: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_demographics
Import EntityType feature values backing LRO: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_demographics/operations/8230023926097379328
EntityType feature values imported. Resource name: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_demographics


<google.cloud.aiplatform.featurestore.entity_type.EntityType object at 0x7f84f9a768c0> 
resource name: projects/936546808722/locations/us-central1/featurestores/fs_credit_scoring/entityTypes/et_customer_demographics