In [None]:
import tecton, os
import pandas as pd
from pprint import pprint

tecton.set_credentials(tecton_api_key='YOUR TECTON API KEY')

In [None]:
import numpy as np

df = pd.DataFrame()
rng = np.random.default_rng()

examples = 10000

schema = [
    {
        "col_name" : "user_id",
        "col_type" :"id",
        "range" : [1,99999]
    },
    {
       "col_name" : "loan_request_amount",
       "col_type" :"int",
       "range" : [0,1000000]
    },
    {
       "col_name" : "user_dob",
       "col_type" :"timestamp",
       "range" : ["01/01/1923","01/01/2003"]
    },
    {
       "col_name" : "user_credit_score",
       "col_type" :"int",
       "range" : [300,850]
    },
    {
       "col_name" : "user_state_of_residence",
       "col_type" :"categorical",
       "range" : ["CA", "NY"]
    },
    {
       "col_name" : "request_timestamp",
       "col_type" :"timestamp",
       "range" : ["01/01/2023","10/01/2023"]
    },
    {
       "col_name" : "loan_result",
       "col_type" :"categorical",
       "range" : ["accepted","rejected"]
    }
]

for i in schema:
    if i["col_type"] == "id":
        df[i["col_name"]] = pd.Series(rng.integers(low=i["range"][0], high=i["range"][1], size=examples)).astype("string")
    elif i["col_type"] == "int":
        df[i["col_name"]] = pd.Series(rng.integers(low=i["range"][0], high=i["range"][1], size=examples))
    elif i["col_type"] == "float":
        df[i["col_name"]] = pd.Series(rng.uniform(low=i["range"][0], high=i["range"][1], size=examples))
    elif i["col_type"] == "timestamp":
        df[i["col_name"]] = pd.Series(pd.to_datetime(np.random.randint(pd.to_datetime(i["range"][0]).value//10**9, pd.to_datetime(i["range"][1]).value//10**9, examples), unit='s'))
    elif i["col_type"] == "categorical":
        df[i["col_name"]] = pd.Series(np.random.choice(i["range"],examples))

In [None]:
from tecton import RequestSource, on_demand_feature_view, FeatureService
from tecton.types import Float64, Int64, Field, Bool, String

loan_request = RequestSource(
    schema=[
        Field('loan_request_amount', Float64),
        Field('user_credit_score', Int64),
        Field('user_dob', String),
        Field('user_state', String)
    ]
)

# An example of an on-demand feature view that depends only on a request source.
@on_demand_feature_view(
    sources=[loan_request],
    mode='python',
    schema=[Field('request_within_limits', Bool)],
    description='The loan request amount is higher than min and lower than max'
)
def request_within_limits(loan_request):
    min_loan_amount=1000
    max_loan_amount=1000000
    return {'request_within_limits': loan_request['loan_request_amount'] > min_loan_amount and loan_request['loan_request_amount'] < max_loan_amount}

@on_demand_feature_view(
    sources=[loan_request],
    mode='python',
    schema=[Field('request_credit_check', Bool)],
    description='The loan requesters credit is higher than min'
)
def request_credit_check(loan_request):
    min_credit_score = 400
    return {'request_credit_check': loan_request['user_credit_score'] > min_credit_score}

@on_demand_feature_view(
    sources=[loan_request],
    mode='python',
    schema=[Field('request_age_check', Bool)],
    description='The loan requests age is older than 18'
)
def request_age_check(loan_request):
    from datetime import datetime
    return {'request_age_check': (datetime.now() - datetime.strptime(loan_request['user_dob'], '%m/%d/%Y')).days / 365.2425  > 18}

@on_demand_feature_view(
    sources=[loan_request],
    mode='python',
    schema=[Field('request_residence_check', Bool)],
    description='The loan request amount is higher than min and lower than max'
)
def request_residence_check(loan_request):
    states = ['CA', 'NY']
    return {'request_residence_check': loan_request['user_state'] in states}

rules_fs = FeatureService(
    name = 'rules_fs',
    features = [
        request_within_limits,
        request_credit_check,
        request_age_check,
        request_residence_check
    ]
)

rules_fs.validate()

In [None]:
mock_request = {
    "loan_request_amount" : 5000,
    "user_credit_score" : 500,
    "user_dob" : "01/01/1990",
    "user_state" : "TX"
}

In [None]:
for rule in rules_fs.features:
    if list(rule.feature_definition.run(loan_request=mock_request).values())[0]:
        print("Request passes rule " + rule.feature_definition.info.name)
    else:
        print("Request valiates rule " + rule.feature_definition.info.name)

In [None]:
from tecton import FilteredSource, Entity, batch_feature_view, FileConfig, BatchSource
from datetime import datetime, timedelta

from tecton.types import Float64, Field, String, Int64, Timestamp


user = Entity(
    name="user",
    join_keys=["user_id"],
)
user.validate()

loan_history = BatchSource(
    name='loan_history',
    batch_config=FileConfig(
        uri='s3://tecton-devrel-rift/loan_history.parquet',
        file_format='parquet',
        timestamp_field='request_timestamp'
    )
)

@batch_feature_view(
    sources=[loan_history],
    entities=[user],
    mode='pandas',
    batch_schedule=timedelta(days=1),
    feature_start_time=datetime(2023,1,1),
    schema=[
        Field("user_id", String),
        Field("request_timestamp", Timestamp),
        Field("loan_request_amount", Int64),
        Field("loan_rank", Float64),
        Field("user_credit_score", Int64),
        Field("credit_rank", Float64),
        Field("loan_result", String),
    ]
)
def loan_percentiles(loan_history):
    from scipy import stats

    loan_history["loan_rank"] = pd.Series(stats.percentileofscore(loan_history['loan_request_amount'], loan_history['loan_request_amount']))
    loan_history["credit_rank"] = pd.Series(stats.percentileofscore(loan_history['user_credit_score'], loan_history['user_credit_score']))
    return loan_history[["user_id","request_timestamp","loan_request_amount", "loan_rank", "user_credit_score","credit_rank", "loan_result"]]

loan_percentiles.validate()

In [None]:
start = datetime(2023,1,1)
end = datetime(2023,6,1)

loan_ghf = loan_percentiles.get_historical_features(start_time=start, end_time=end).to_pandas()
display(loan_ghf)

In [None]:
from tecton import RequestSource, on_demand_feature_view
from tecton.types import Bool

# Define the schema for the request data
heuristic_schema = [
    Field("loan_request_amount", Int64),
    Field("user_credit_score", Int64),
    Field("user_dob", String),
    Field("user_state", String)
]

# Create a RequestSource object with the defined schema
heuristic_request = RequestSource(schema=heuristic_schema)

# Define the schema for the output data
output_schema = [
    Field("loan_rank", Float64),
    Field("credit_rank", Float64),
    Field("accept_loan", Bool),
]

# Define the on-demand feature view function
@on_demand_feature_view(
    # Parameters are filled in as described above
    sources=[heuristic_request, loan_percentiles],
    mode='python',
    schema=output_schema
)
def loan_odfv(heuristic_request, loan_percentiles):
    from scipy import stats

    loan_rank = stats.percentileofscore(loan_percentiles['loan_request_amount'], heuristic_request["loan_request_amount"])
    credit_rank = stats.percentileofscore(loan_percentiles['user_credit_score'], heuristic_request["user_credit_score"])
    return {
        'loan_rank': loan_rank,
        'credit_rank': credit_rank,
        'heuristic_accept_loan': loan_rank <= credit_rank
    }
loan_odfv.validate()

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics


training_data = loan_ghf[[ "loan_rank", "credit_rank","loan_result"]]

X = training_data.drop("loan_result", axis=1)
y = training_data["loan_result"].replace({"accepted": 1, "rejected": 0})

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LogisticRegression(max_iter=1000, random_state=42)

model.fit(X_train, y_train)

y_predict = model.predict(X_test)

print(metrics.classification_report(y_test, y_predict, zero_division=0))

In [None]:
def loan_assessment(new_request):
    for rule in rules_fs.features:
        if list(rule.feature_definition.run(loan_request=new_request).values())[0]:
            continue
        else:
            return "Loan is rejected. Request valiates rule " + rule.feature_definition.info.name
    print("Request passes all rules")

    request_result = loan_odfv.run(heuristic_request=new_request,loan_percentiles=loan_ghf[['loan_request_amount','user_credit_score','loan_rank', 'credit_rank', 'loan_result']])

    if request_result["heuristic_accept_loan"]:
        print("Heuristic accepts loan")
    else:
        return "Heuristic rejects loan"
    if model.predict([[request_result["loan_rank"],request_result["credit_rank"]]]):
        print("ML Model accepts loan")
    else:
        return "ML Model rejects loan"

    return "Request passes all checks, loan accepted"

In [None]:
mock_request = {
    "loan_request_amount" : 5000,
    "user_credit_score" : 500,
    "user_dob" : "01/01/1990",
    "user_state" : "CA"
}

loan_assessment(mock_request)