# Tecton Rules and Heuristics

Create rules and heuristics to supplement or circumvent machine learning models with Tecton.

This notebook runs on Tecton using Rift. Request a private preview of Rift [here](https://resources.tecton.ai/rift).

This notebook needs a Tecton account and API Key to run.

In [None]:
import tecton, os
import pandas as pd
from pprint import pprint

tecton.set_credentials(tecton_api_key='YOUR TECTON API KEY')

## Tecton Rules

In this section, Tecton ODFVs are used to create rules that are bundled together in a Feature Service before being tested with a mocked input.

In [None]:
from tecton import RequestSource, on_demand_feature_view, FeatureService
from tecton.types import Float64, Int64, Field, Bool, String

# Input sent to each rule will have the following schema
# A mock example is in the next cell
loan_request = RequestSource(
    schema=[
        Field('loan_request_amount', Float64),
        Field('user_credit_score', Int64),
        Field('user_dob', String),
        Field('user_state', String)
    ]
)

@on_demand_feature_view(
    sources=[loan_request],                                                         # Input defined above
    mode='python',                                                                  # Feature is to be transformed using Python
    schema=[Field('request_within_limits', Bool)],                                  # Output will be a Bool, pass/fail for each rule
    description='The loan request amount is higher than min and lower than max',
    tags={'feature_type': 'rule'}                                                   # Custom tag to distinguish and manage rules
)
def request_within_limits(loan_request):                                            # Rule definition
    min_loan_amount=1000
    max_loan_amount=1000000
    return {'request_within_limits': loan_request['loan_request_amount'] > min_loan_amount and loan_request['loan_request_amount'] < max_loan_amount}

@on_demand_feature_view(
    sources=[loan_request],
    mode='python',
    schema=[Field('request_credit_check', Bool)],
    description='The loan requesters credit is higher than min',
    tags={'feature_type': 'rule'}
)
def request_credit_check(loan_request):
    min_credit_score = 400
    return {'request_credit_check': loan_request['user_credit_score'] > min_credit_score}

@on_demand_feature_view(
    sources=[loan_request],
    mode='python',
    schema=[Field('request_age_check', Bool)],
    description='The loan requests age is older than 18',
    tags={'feature_type': 'rule'}
)
def request_age_check(loan_request):
    from datetime import datetime                                                   # Import custom library to run within ODFV
    return {'request_age_check': (datetime.now() - datetime.strptime(loan_request['user_dob'], '%m/%d/%Y')).days / 365.2425  > 18}

@on_demand_feature_view(
    sources=[loan_request],
    mode='python',
    schema=[Field('request_residence_check', Bool)],
    description='The loan request is coming from a coverage area',
    tags={'feature_type': 'rule'}
)
def request_residence_check(loan_request):
    states = ['CA', 'NY']
    return {'request_residence_check': loan_request['user_state'] in states}

rules_fs = FeatureService(                                                          # Group rules into Feature Service and validate them
    name = 'rules_fs',
    features = [
        request_within_limits,
        request_credit_check,
        request_age_check,
        request_residence_check
    ]
)

rules_fs.validate()

In [None]:
# Mock request, try changing to pass and fail each rule

mock_loan_request = {
    "loan_request_amount" : 5000,
    "user_credit_score" : 500,
    "user_dob" : "01/01/1990",
    "user_state" : "TX"                # Not in accepted states of [CA, NY] and will break rule
}

In [None]:
for rule in rules_fs.features:
    if list(rule.feature_definition.run(loan_request=mock_loan_request).values())[0]:
        print("Request passes rule " + rule.feature_definition.info.name)
    else:
        print("Request valiates rule " + rule.feature_definition.info.name)

## Creating a mock data set

Historical information on loan acceptance will be used to build heuristics and features.

The data is similar to the mock request used to check rule adherence, but has associated user_id, timestamps, and historical results. The data has the following schema:

 * user_id
 * loan_request_amount
 * user_dob
 * user_credit_score
 * user_state_of_residence
 * request_timestamp
 * loan_result

`display(loan_ghf)` will show the mocked data with additional Features Tecton created

In [None]:
from tecton import FilteredSource, Entity, batch_feature_view, FileConfig, BatchSource
from datetime import datetime, timedelta

from tecton.types import Float64, Field, String, Int64, Timestamp


user = Entity(
    name="user",
    join_keys=["user_id"],
)
user.validate()

loan_history = BatchSource(
    name='loan_history',
    batch_config=FileConfig(
        uri='s3://tecton-devrel-rift/loan_history.parquet',
        file_format='parquet',
        timestamp_field='request_timestamp'
    )
)

@batch_feature_view(
    sources=[loan_history],
    entities=[user],
    mode='pandas',
    batch_schedule=timedelta(days=1),
    feature_start_time=datetime(2023,1,1),
    schema=[
        Field("user_id", String),
        Field("request_timestamp", Timestamp),
        Field("loan_request_amount", Int64),
        Field("loan_rank", Float64),
        Field("user_credit_score", Int64),
        Field("credit_rank", Float64),
        Field("loan_result", String),
    ]
)
def loan_percentiles(loan_history):
    from scipy import stats

    loan_history["loan_rank"] = pd.Series(stats.percentileofscore(loan_history['loan_request_amount'], loan_history['loan_request_amount']))
    loan_history["credit_rank"] = pd.Series(stats.percentileofscore(loan_history['user_credit_score'], loan_history['user_credit_score']))
    return loan_history[["user_id","request_timestamp","loan_request_amount", "loan_rank", "user_credit_score","credit_rank", "loan_result"]]

loan_percentiles.validate()

In [None]:
start = datetime(2023,1,1)
end = datetime(2023,6,1)

loan_ghf = loan_percentiles.get_historical_features(start_time=start, end_time=end).to_pandas()
display(loan_ghf)

## Tecton Heuristics

A Tecton ODFV is used for both rules and heurstics, for rules only request data was necessary, but our heuristic will need both incoming request data and historical data (the mock data just created) to compare it to.

In [None]:
from tecton import RequestSource, on_demand_feature_view
from tecton.types import Bool

# Define the schema for the input
heuristic_schema = [
    Field("loan_request_amount", Int64),
    Field("user_credit_score", Int64),
    Field("user_dob", String),
    Field("user_state", String)
]

# Create a RequestSource object with the defined schema
heuristic_request = RequestSource(schema=heuristic_schema)

# Define the schema for the output data, accept_loan is our heuristic
output_schema = [
    Field("loan_rank", Float64),
    Field("credit_rank", Float64),
    Field("accept_loan", Bool),
]

# Define the on-demand feature view function
@on_demand_feature_view(
    sources=[heuristic_request, loan_percentiles],                  # Rules in the prev section took in a single source, our heuristic needs two
    mode='python',
    schema=output_schema
)
def loan_odfv(heuristic_request, loan_percentiles):
    from scipy import stats

    loan_rank = stats.percentileofscore(loan_percentiles['loan_request_amount'], heuristic_request["loan_request_amount"])
    credit_rank = stats.percentileofscore(loan_percentiles['user_credit_score'], heuristic_request["user_credit_score"])
    return {
        'loan_rank': loan_rank,
        'credit_rank': credit_rank,
        'heuristic_accept_loan': loan_rank <= credit_rank           # is the credit of the requester in a higher percentile of our data
    }                                                               # than the percentile of the amount of the loan?
loan_odfv.validate()

## Tecton Machine Learning

A logistic regression model is now trained with the Tecton features previous created

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics


training_data = loan_ghf[[ "loan_rank", "credit_rank","loan_result"]]

X = training_data.drop("loan_result", axis=1)
y = training_data["loan_result"].replace({"accepted": 1, "rejected": 0})

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LogisticRegression(max_iter=1000, random_state=42)

model.fit(X_train, y_train)

y_predict = model.predict(X_test)

print(metrics.classification_report(y_test, y_predict, zero_division=0))

## Putting everything together

`loan_assessment()` will run all three decision-making paradigms we just created with Tecton. Try changing the mock request and rerunning.

In [None]:
def loan_assessment(new_request):
    for rule in rules_fs.features:
        if list(rule.feature_definition.run(loan_request=new_request).values())[0]:
            continue
        else:
            return "Loan is rejected. Request valiates rule " + rule.feature_definition.info.name
    print("Request passes all rules")

    request_result = loan_odfv.run(heuristic_request=new_request,loan_percentiles=loan_ghf[['loan_request_amount','user_credit_score','loan_rank', 'credit_rank', 'loan_result']])

    if request_result["heuristic_accept_loan"]:
        print("Heuristic accepts loan")
    else:
        return "Heuristic rejects loan"
    if model.predict([[request_result["loan_rank"],request_result["credit_rank"]]]):
        print("ML Model accepts loan")
    else:
        return "ML Model rejects loan"

    return "Request passes all checks, loan accepted"

In [None]:
mock_request = {
    "loan_request_amount" : 5000,
    "user_credit_score" : 500,
    "user_dob" : "01/01/1990",
    "user_state" : "CA"
}

loan_assessment(mock_request)