# Redis as online feature store with Feast - setup

This notebook is an adaptation of the [Feast Tutorial](https://docs.feast.dev/tutorials/tutorials-overview/real-time-credit-scoring-on-aws) that uses [Redis online feature store](https://docs.feast.dev/reference/online-stores/redis).

<a href="https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/feature-store/python-recipes/feature-store/01_feast_credit_score.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


In [1]:
!pip install -q feast['redis']
!feast version

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/80.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.8/80.8 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/119.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.4/119.4 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/94.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.5/166.5 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.1/241.1 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# NBVAL_SKIP
%%sh
curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list
sudo apt-get update  > /dev/null 2>&1
sudo apt-get install redis-stack-server  > /dev/null 2>&1
redis-stack-server --daemonize yes

deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb jammy main
Starting redis-stack-server, database path /var/lib/redis-stack


In [3]:
import os

REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = os.getenv("REDIS_PORT", "6379")
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "")
# Replace values above with your own if using Redis Cloud instance
#REDIS_HOST="redis-18374.c253.us-central1-1.gce.cloud.redislabs.com"
#REDIS_PORT=18374
#REDIS_PASSWORD="1TNxTEdYRDgIDKM2gDfasupCADXXXX"

# Shortcut for redis-cli $REDIS_CONN command
# If SSL is enabled on the endpoint add --tls
if REDIS_PASSWORD!="":
  os.environ["REDIS_CONN"]=f"-h {REDIS_HOST} -p {REDIS_PORT} -a {REDIS_PASSWORD} --no-auth-warning"
else:
  os.environ["REDIS_CONN"]=f"-h {REDIS_HOST} -p {REDIS_PORT}"

# If SSL is enabled on the endpoint, use rediss:// as the URL prefix
REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}"

# See https://docs.feast.dev/reference/online-stores/redis for details on Feast connection to Redis
REDIS_URL_FEAST = f"{REDIS_HOST}:{REDIS_PORT},ssl=false,password={REDIS_PASSWORD}"

In [4]:
%%bash
mkdir creditscore
mkdir creditscore/data
wget https://github.com/antonum/feast-redis/raw/refs/heads/main/creditscore/data/credit_history.parquet -q -P creditscore/data
wget https://github.com/antonum/feast-redis/raw/refs/heads/main/creditscore/data/zipcode_table.parquet -q -P creditscore/data
wget https://github.com/antonum/feast-redis/raw/refs/heads/main/creditscore/data/loan_table.parquet -q -P creditscore/data
#cd creditscore
#touch __init__.py


In [5]:
feature_store = \
f"""project: creditscore
registry: data/registry.db
provider: local
online_store:
    #path: data/online_store.db
    type: redis
    connection_string: {REDIS_URL_FEAST}
entity_key_serialization_version: 2
"""
with open('creditscore/feature_store.yaml', "w") as file:
    file.write(feature_store)

# Print our feature_store.yaml
! cat creditscore/feature_store.yaml

project: creditscore
registry: data/registry.db
provider: local
online_store:
    #path: data/online_store.db
    type: redis
    connection_string: localhost:6379,ssl=false,password=
entity_key_serialization_version: 2


In [6]:
features_file = \
f"""from datetime import timedelta

from feast import (Entity, Field, FeatureView,
                   ValueType, FileSource)

from feast.types import Float32, Int64, String

zipcode = Entity(
    name="zipcode"
    )

zipcode_source = FileSource(
    path="data/zipcode_table.parquet",
    timestamp_field="event_timestamp",
    #event_timestamp_column="event_timestamp",
    created_timestamp_column="created_timestamp",
)

zipcode_features = FeatureView(
    name="zipcode_features",
    entities=[zipcode],
    ttl=timedelta(days=3650),
    schema=[
        Field(name="city", dtype=String),
        Field(name="state", dtype=String),
        Field(name="location_type", dtype=String),
        Field(name="tax_returns_filed", dtype=Int64),
        Field(name="population", dtype=Int64),
        Field(name="total_wages", dtype=Int64),
    ],
    source=zipcode_source,
)

dob_ssn = Entity(
    name="dob_ssn",
    description="Date of birth and last four digits of social security number",
)

credit_history_source = FileSource(
    path="data/credit_history.parquet",
    timestamp_field="event_timestamp",
    #event_timestamp_column="event_timestamp",
    created_timestamp_column="created_timestamp",

)

credit_history = FeatureView(
    name="credit_history",
    entities=[dob_ssn],
    ttl=timedelta(days=3650),
    schema=[
        Field(name="dob_ssn", dtype=String),  # Add entity column for dob_ssn
        Field(name="credit_card_due", dtype=Int64),
        Field(name="mortgage_due", dtype=Int64),
        Field(name="student_loan_due", dtype=Int64),
        Field(name="vehicle_loan_due", dtype=Int64),
        Field(name="hard_pulls", dtype=Int64),
        Field(name="missed_payments_2y", dtype=Int64),
        Field(name="missed_payments_1y", dtype=Int64),
        Field(name="missed_payments_6m", dtype=Int64),
        Field(name="bankruptcies", dtype=Int64),
    ],
    source=credit_history_source,
)
"""
with open('creditscore/features.py', "w") as file:
    file.write(features_file)

# Print our features.py
#! cat creditscore/features.py

## Create Feast repository

In [7]:
%cd creditscore/
!feast apply

/content/creditscore
No project found in the repository. Using project name creditscore defined in feature_store.yaml
Applying changes for project creditscore
Deploying infrastructure for [1m[32mcredit_history[0m
Deploying infrastructure for [1m[32mzipcode_features[0m


## Materialize features into Redis

In [8]:
!feast materialize 2010-11-19T16:57:10 2024-11-26T16:57:10
%cd ..



Materializing [1m[32m2[0m feature views from [1m[32m2010-11-19 16:57:10+00:00[0m to [1m[32m2024-11-26 16:57:10+00:00[0m into the [1m[32mredis[0m online store.

[1m[32mcredit_history[0m:
100%|███████████████████████████████████████████████████████| 28633/28633 [00:03<00:00, 7499.89it/s]
[1m[32mzipcode_features[0m:
100%|██████████████████████████████████████████████████████| 28844/28844 [00:02<00:00, 12581.21it/s]
/content


## Retreive feature vector from the Redis Online Store

In [10]:
from feast import FeatureStore
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

store = FeatureStore(repo_path="creditscore/")
feast_features = [
        "zipcode_features:city",
        "zipcode_features:state",
        "zipcode_features:location_type",
        "zipcode_features:tax_returns_filed",
        "zipcode_features:population",
        "zipcode_features:total_wages",
        "credit_history:credit_card_due",
        "credit_history:mortgage_due",
        "credit_history:student_loan_due",
        "credit_history:vehicle_loan_due",
        "credit_history:hard_pulls",
        "credit_history:missed_payments_2y",
        "credit_history:missed_payments_1y",
        "credit_history:missed_payments_6m",
        "credit_history:bankruptcies",
    ]
zipcode = "76104"
dob_ssn = "19630621_4278"

feature_vector = store.get_online_features(
    features = feast_features,
    entity_rows=[{"zipcode": zipcode, "dob_ssn": dob_ssn}]
)
feature_vector.to_dict()

{'zipcode': [76104],
 'dob_ssn': ['19630621_4278'],
 'location_type': ['PRIMARY'],
 'population': [10534],
 'state': ['TX'],
 'city': ['FORT WORTH'],
 'total_wages': [142325465],
 'tax_returns_filed': [6058],
 'mortgage_due': [378847],
 'student_loan_due': [44375],
 'hard_pulls': [1],
 'credit_card_due': [3343],
 'missed_payments_6m': [0],
 'missed_payments_1y': [0],
 'bankruptcies': [0],
 'missed_payments_2y': [0],
 'vehicle_loan_due': [11506]}

## Examine source data

In [19]:
import pandas as pd
pd.read_parquet("creditscore/data/credit_history.parquet")
# zipcode_table.parquet
# loan_table.parquet

Unnamed: 0,event_timestamp,dob_ssn,credit_card_due,mortgage_due,student_loan_due,vehicle_loan_due,hard_pulls,missed_payments_2y,missed_payments_1y,missed_payments_6m,bankruptcies,created_timestamp
0,2020-04-26 18:01:04.746575,19530219_5179,8419,91803,22328,15078,0,1,0,0,0,2020-04-26 18:01:04.746575
1,2020-04-26 18:01:04.746575,19781116_7723,2944,741165,2515,28605,0,3,3,1,0,2020-04-26 18:01:04.746575
2,2020-04-26 18:01:04.746575,19931128_5771,833,976522,33000,21733,9,7,0,0,0,2020-04-26 18:01:04.746575
3,2020-04-26 18:01:04.746575,19500806_6783,5936,1553523,48955,26219,1,0,0,0,0,2020-04-26 18:01:04.746575
4,2020-04-26 18:01:04.746575,19620322_7692,1575,1067381,9501,15814,1,1,0,0,0,2020-04-26 18:01:04.746575
...,...,...,...,...,...,...,...,...,...,...,...,...
2033293,2021-08-29 18:01:04.746575,19621030_8837,9045,1106144,25760,13826,8,5,2,1,0,2021-08-29 18:01:04.746575
2033294,2021-08-29 18:01:04.746575,19810914_5886,5065,1376873,20594,13948,8,5,1,1,0,2021-08-29 18:01:04.746575
2033295,2021-08-29 18:01:04.746575,19491025_8061,738,273532,24113,15902,10,1,2,1,0,2021-08-29 18:01:04.746575
2033296,2021-08-29 18:01:04.746575,19751125_4615,3443,1534792,43133,16294,4,6,2,1,0,2021-08-29 18:01:04.746575


## Model class

In [12]:
from pathlib import Path

import feast
import joblib
import pandas as pd
from sklearn import tree
from sklearn.exceptions import NotFittedError
from sklearn.preprocessing import OrdinalEncoder
from sklearn.utils.validation import check_is_fitted


class CreditScoringModel:
    categorical_features = [
        "person_home_ownership",
        "loan_intent",
        "city",
        "state",
        "location_type",
    ]

    feast_features = [
        "zipcode_features:city",
        "zipcode_features:state",
        "zipcode_features:location_type",
        "zipcode_features:tax_returns_filed",
        "zipcode_features:population",
        "zipcode_features:total_wages",
        "credit_history:credit_card_due",
        "credit_history:mortgage_due",
        "credit_history:student_loan_due",
        "credit_history:vehicle_loan_due",
        "credit_history:hard_pulls",
        "credit_history:missed_payments_2y",
        "credit_history:missed_payments_1y",
        "credit_history:missed_payments_6m",
        "credit_history:bankruptcies",
    ]

    target = "loan_status"
    model_filename = "model.bin"
    encoder_filename = "encoder.bin"

    def __init__(self,secret=""):
        # Load model
        if Path(self.model_filename).exists():
            self.classifier = joblib.load(self.model_filename)
        else:
            self.classifier = tree.DecisionTreeClassifier()

        # Load ordinal encoder
        if Path(self.encoder_filename).exists():
            self.encoder = joblib.load(self.encoder_filename)
        else:
            self.encoder = OrdinalEncoder()

        # Set up feature store
        self.fs = feast.FeatureStore(repo_path="creditscore/")
        #if secret and (":" in secret):
        #    self.fs.config.online_store.connection_string=secret

    def train(self, loans):
        train_X, train_Y = self._get_training_features(loans)

        self.classifier.fit(train_X[sorted(train_X)], train_Y)
        joblib.dump(self.classifier, self.model_filename)

    def _get_training_features(self, loans):
        training_df = self.fs.get_historical_features(
            entity_df=loans, features=self.feast_features
        ).to_df()

        self._fit_ordinal_encoder(training_df)
        self._apply_ordinal_encoding(training_df)
        #print(training_df.head())
        train_X = training_df[
            training_df.columns.drop(self.target)
            .drop("event_timestamp")
            .drop("created_timestamp__")
            .drop("loan_id")
            .drop("zipcode")
            .drop("dob_ssn")
        ]
        train_X = train_X.reindex(sorted(train_X.columns), axis=1)
        train_Y = training_df.loc[:, self.target]

        return train_X, train_Y

    def _fit_ordinal_encoder(self, requests):
        self.encoder.fit(requests[self.categorical_features])
        joblib.dump(self.encoder, self.encoder_filename)

    def _apply_ordinal_encoding(self, requests):
        requests[self.categorical_features] = self.encoder.transform(
            requests[self.categorical_features]
        )

    def predict(self, request):
        # Get online features from Feast
        feature_vector = self._get_online_features_from_feast(request)

        # Join features to request features
        features = request.copy()
        features.update(feature_vector)
        features_df = pd.DataFrame.from_dict(features)

        # Apply ordinal encoding to categorical features
        self._apply_ordinal_encoding(features_df)

        # Sort columns
        features_df = features_df.reindex(sorted(features_df.columns), axis=1)

        # Drop unnecessary columns
        features_df = features_df[features_df.columns.drop("zipcode").drop("dob_ssn")]

        # Make prediction
        features_df["prediction"] = self.classifier.predict(features_df)

        # return result of credit scoring
        return features_df["prediction"].iloc[0]

    def _get_online_features_from_feast(self, request):
        zipcode = request["zipcode"][0]
        dob_ssn = request["dob_ssn"][0]

        return self.fs.get_online_features(
            entity_rows=[{"zipcode": zipcode, "dob_ssn": dob_ssn}],
            features=self.feast_features,
        ).to_dict()

    def is_model_trained(self):
        try:
            check_is_fitted(self.classifier, "tree_")
        except NotFittedError:
            return False
        return True


## Initialize the model

Now we need to train the model and make a sample prediction. After training ic completed you'll see `model.bin` and `encoder.bin` files in the filesystem

In [13]:
#Since we are declaring CreditScoringModel class within the same notebook, no need to import it
#from credit_model import CreditScoringModel

# Get historic loan data
loans = pd.read_parquet("creditscore/data/loan_table.parquet")

# Create model
model = CreditScoringModel()

# Train model (using Parquet for zipcode and credit history features)
if not model.is_model_trained():
    print("Model not trained. Performing training.")
    model.train(loans)

# Make online prediction (using Redis for retrieving online features)
loan_request = {
    "zipcode": [76104],
    "dob_ssn": ["19630621_4278"],
    "person_age": [63],
    "person_income": [159000],
    "person_home_ownership": ["RENT"],
    "person_emp_length": [123.0],
    "loan_intent": ["PERSONAL"],
    "loan_amnt": [5000],
    "loan_int_rate": [16.02],
}

result = model.predict(loan_request)

if result == 0:
    print("Loan approved!")
elif result == 1:
    print("Loan rejected!")

Model not trained. Performing training.
Loan rejected!


In [14]:
print("User input:")
df = pd.DataFrame.from_dict(loan_request)
df.transpose()

User input:


Unnamed: 0,0
zipcode,76104
dob_ssn,19630621_4278
person_age,63
person_income,159000
person_home_ownership,RENT
person_emp_length,123.0
loan_intent,PERSONAL
loan_amnt,5000
loan_int_rate,16.02


In [15]:
#print("Online features from Feast:")
feature_vector = model._get_online_features_from_feast(loan_request)
feature_vector_df=pd.DataFrame.from_dict(feature_vector)
feature_vector_df.transpose()

Unnamed: 0,0
zipcode,76104
dob_ssn,19630621_4278
location_type,PRIMARY
population,10534
state,TX
city,FORT WORTH
total_wages,142325465
tax_returns_filed,6058
mortgage_due,378847
student_loan_due,44375


In [16]:
# Join features to request features
features = loan_request.copy()
features.update(feature_vector)
features_df = pd.DataFrame.from_dict(features)
features_df.transpose()

Unnamed: 0,0
zipcode,76104
dob_ssn,19630621_4278
person_age,63
person_income,159000
person_home_ownership,RENT
person_emp_length,123.0
loan_intent,PERSONAL
loan_amnt,5000
loan_int_rate,16.02
location_type,PRIMARY


In [17]:
result = model.predict(loan_request)

if result == 0:
    print("Loan approved!")
elif result == 1:
    print("Loan rejected!")

Loan rejected!


In [18]:
# retreive sample of keys from redis
!redis-cli $REDIS_CONN SCAN 0


1) "36864"
2)  1) "\x02\x00\x00\x00dob_ssn\x02\x00\x00\x00\r\x00\x00\x0019641111_5588creditscore"
    2) "\x02\x00\x00\x00dob_ssn\x02\x00\x00\x00\r\x00\x00\x0019870405_7389creditscore"
    3) "\x02\x00\x00\x00zipcode\x04\x00\x00\x00\b\x00\x00\x00\xde*\x00\x00\x00\x00\x00\x00creditscore"
    4) "\x02\x00\x00\x00zipcode\x04\x00\x00\x00\b\x00\x00\x00\xf7\xd2\x00\x00\x00\x00\x00\x00creditscore"
    5) "\x02\x00\x00\x00dob_ssn\x02\x00\x00\x00\r\x00\x00\x0019780706_4662creditscore"
    6) "\x02\x00\x00\x00zipcode\x04\x00\x00\x00\b\x00\x00\x00v\xcf\x00\x00\x00\x00\x00\x00creditscore"
    7) "\x02\x00\x00\x00dob_ssn\x02\x00\x00\x00\r\x00\x00\x0019930520_3472creditscore"
    8) "\x02\x00\x00\x00dob_ssn\x02\x00\x00\x00\r\x00\x00\x0019700413_9721creditscore"
    9) "\x02\x00\x00\x00dob_ssn\x02\x00\x00\x00\r\x00\x00\x0019690724_6258creditscore"
   10) "\x02\x00\x00\x00dob_ssn\x02\x00\x00\x00\r\x00\x00\x0019910612_7073creditscore"
