## Feast Installation and Data

In [26]:
!pip install feast scikit-learn 'feast[gcp]' 



In [27]:
!pip install numpy==2.1 --quiet

In [28]:
!pip install feast==0.54 --upgrade --quiet

In [29]:
!feast version

[1m[34mFeast SDK Version: [1m[32m"0.54.0"


In [30]:
!pip install google-cloud-bigquery



### Data

In [31]:
!wget https://raw.githubusercontent.com/IITMBSMLOps/ga_resources/refs/heads/week_3/iris_data_adapted_for_feast.csv

--2025-10-12 09:23:19--  https://raw.githubusercontent.com/IITMBSMLOps/ga_resources/refs/heads/week_3/iris_data_adapted_for_feast.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4009 (3.9K) [text/plain]
Saving to: ‘iris_data_adapted_for_feast.csv’


2025-10-12 09:23:19 (60.1 MB/s) - ‘iris_data_adapted_for_feast.csv’ saved [4009/4009]



In [32]:
%%bash
feast init iris_feature_repo
cd iris_feature_repo


Creating a new Feast repository in /home/jupyter/iris_feature_repo.



In [33]:
repo = "/home/jupyter/iris_feature_repo/feature_repo"

## GCP and BigQuery

In [None]:
#FOR COLAB

# from google.colab import auth
# auth.authenticate_user()

In [34]:
PROJECT_ID= "gentle-presence-472611-u8" #@param {type:"string"}
BUCKET_NAME= "mlops-course-gentle-presence-472611-u8-v4-unique-week1" #@param {type:"string"} custom
BIGQUERY_DATASET_NAME="feast_iris_data" #@param {type:"string"} custom
TABLE_ID = "iris_features" #@param {type:"string"

# ! gcloud config set project $PROJECT_ID
# %env GOOGLE_CLOUD_PROJECT= $PROJECT_ID
# !echo project_id = $PROJECT_ID > ~/.bigqueryrc

- Create table in BigQuery

In [35]:
from google.cloud import bigquery

client = bigquery.Client(project=PROJECT_ID)

#FOR creating dataset 

#dataset_ref = client.dataset(BIGQUERY_DATASET_NAME)
#dataset = bigquery.Dataset(dataset_ref)
#dataset = client.create_dataset(dataset)

In [36]:
import pandas as pd
df = pd.read_csv("iris_data_adapted_for_feast.csv")
# df["event_timestamp"] = pd.to_datetime(df["event_timestamp"]).dt.tz_localize(None)
# df["created_timestamp"] = pd.to_datetime(df["created_timestamp"]).dt.tz_localize(None)

df["event_timestamp"] = pd.to_datetime(df["event_timestamp"], utc=True)
df["created_timestamp"] = pd.to_datetime(df["created_timestamp"], utc=True)

In [37]:
df.dtypes

event_timestamp      datetime64[ns, UTC]
iris_id                            int64
sepal_length                     float64
sepal_width                      float64
petal_length                     float64
petal_width                      float64
species                           object
created_timestamp    datetime64[ns, UTC]
dtype: object

In [38]:
table_ref = dataset_ref.table(TABLE_ID)

job = client.load_table_from_dataframe(df, table_ref)
job.result()

print("Table created!")

Table created!


## Feature Store

- Define the feature view in the iris repository
- Modify the YAML file to set bigquery as the offline store

In [None]:
"""
project: iris_classification
registry: data/registry.db
provider: gcp
offline_store:
  type: bigquery

"""

In [None]:
"""
from datetime import timedelta

from feast import BigQuerySource, Entity, Feature, FeatureView, ValueType, Field
from feast.types import Float64, Int64

iris_entity = Entity(name="iris_id", join_keys=["iris_id"], value_type=ValueType.INT64,)

table = "gentle-presence-472611-u8.feast_iris_data.iris_features"

iris_data_source = BigQuerySource(
    table=table,
    timestamp_field="event_timestamp",
    created_timestamp_column="created_timestamp",
)

iris_feature_view = FeatureView(
    name="iris_feature_view",
    entities=[iris_entity],
    ttl=timedelta(weeks=52),
    schema=[
        Field(name="sepal_length", dtype=Float64),
        Field(name="sepal_width", dtype=Float64),
        Field(name="petal_length", dtype=Float64),
        Field(name="petal_width", dtype=Float64),
    ],
    source=iris_data_source,
    tags={"team": "iris_species"},
)
"""

In [58]:
%%bash
cd iris_feature_repo/feature_repo
feast apply

No project found in the repository. Using project name iris_classification defined in feature_store.yaml
Applying changes for project iris_classification
Deploying infrastructure for iris_feature_view


In [59]:
fs.get_feature_view("iris_feature_view")

<FeatureView(name = iris_feature_view, entities = ['iris_id'], ttl = 364 days, 0:00:00, stream_source = None, data_source = {
  "type": "BATCH_BIGQUERY",
  "timestampField": "event_timestamp",
  "createdTimestampColumn": "created_timestamp",
  "bigqueryOptions": {
    "table": "gentle-presence-472611-u8.feast_iris_data.iris_features"
  },
  "name": "gentle-presence-472611-u8.feast_iris_data.iris_features",
  "meta": {
    "createdTimestamp": "2025-10-12T09:44:29.219383Z",
    "lastUpdatedTimestamp": "2025-10-12T09:44:29.415131Z"
  }
}, source_views = [], batch_source = {
  "type": "BATCH_BIGQUERY",
  "timestampField": "event_timestamp",
  "createdTimestampColumn": "created_timestamp",
  "bigqueryOptions": {
    "table": "gentle-presence-472611-u8.feast_iris_data.iris_features"
  },
  "name": "gentle-presence-472611-u8.feast_iris_data.iris_features",
  "meta": {
    "createdTimestamp": "2025-10-12T09:44:29.219383Z",
    "lastUpdatedTimestamp": "2025-10-12T09:44:29.415131Z"
  }
}, entity

## Train and Materialize
- Split into train, val, test
- Store train and val features in Offline store for batch evalation
- Predict on the val data

In [46]:
import feast
from joblib import dump
import numpy as np
from sklearn.model_selection import train_test_split
from pandas.plotting import parallel_coordinates
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn import metrics

# Load the data
df = pd.read_csv("iris_data_adapted_for_feast.csv")
df["event_timestamp"] = pd.to_datetime(df["event_timestamp"]).dt.tz_localize(None)
df["created_timestamp"] = pd.to_datetime(df["created_timestamp"]).dt.tz_localize(None)

# Split into train/test
train_val, test = train_test_split(df, test_size = 0.15, stratify = df['species'], random_state = 42)
train, val = train_test_split(train_val, test_size = 0.3, stratify = train_val['species'], random_state = 42)

# Connect to your local feature store
fs = feast.FeatureStore(repo_path="iris_feature_repo/feature_repo")

# Retrieve training and val data from BigQuery
features_ref = [
        "iris_feature_view:sepal_length",
        "iris_feature_view:sepal_width",
        "iris_feature_view:petal_length",
        "iris_feature_view:petal_width",
    ]

feature_cols = ["sepal_length", "sepal_width", "petal_length", "petal_width", "species"]

# Drop features from entity dataframe
train_entity = train.drop(columns=feature_cols)
val_entity = val.drop(columns=feature_cols)

train_features = fs.get_historical_features(
    entity_df = train_entity,
    features = features_ref,
).to_df()

val_features = fs.get_historical_features(
    entity_df = val_entity,
    features = features_ref,
).to_df()


In [47]:
# Train val split
X_train = train_features[['sepal_length','sepal_width','petal_length','petal_width']]
y_train = train['species']
X_val = val_features[['sepal_length','sepal_width','petal_length','petal_width']]
y_val = val['species']

#predict with the offline store on the val data
#considering the val data as batch prediction and the test data here as real time inference

iris_model = DecisionTreeClassifier(random_state = 1)
iris_model.fit(X_train,y_train)

# Save model
dump(iris_model, "iris_model.bin")

['iris_model.bin']

In [48]:
prediction=iris_model.predict(X_val)
print('The accuracy of the Decision Tree is',"{:.3f}".format(metrics.accuracy_score(prediction,y_val)))

The accuracy of the Decision Tree is 0.333


In [60]:
output = val_features.copy()
output['predictions'] = prediction
output.to_csv("predictions.csv", index=False)

In [49]:
!cd $repo && feast materialize 2025-09-17T00:00:00 2025-10-01T00:00:00

Materializing [1m[32m1[0m feature views from [1m[32m2025-09-17 00:00:00+00:00[0m to [1m[32m2025-10-01 00:00:00+00:00[0m into the [1m[32msqlite[0m online store.

[1m[32miris_feature_view[0m:


## Fetch and Infer
- Fetch from the Online Store
- Infer on the test data

In [52]:
test

Unnamed: 0,event_timestamp,iris_id,sepal_length,sepal_width,petal_length,petal_width,species,created_timestamp
34,2025-09-21 10:40:17.102131,1003,5.04,3.48,1.48,0.32,setosa,2025-10-02 10:40:17.172178
11,2025-09-28 10:40:17.102131,1001,5.45,2.41,3.87,1.0,versicolor,2025-10-02 10:40:17.172178
22,2025-09-24 10:40:17.102131,1002,4.69,3.03,1.19,0.24,setosa,2025-10-02 10:40:17.172178
29,2025-10-01 10:40:17.102131,1002,4.84,2.9,1.29,0.2,setosa,2025-10-02 10:40:17.172178
25,2025-09-27 10:40:17.102131,1002,4.86,2.92,1.43,0.11,setosa,2025-10-02 10:40:17.172178
9,2025-09-26 10:40:17.102131,1001,5.52,2.41,3.76,1.11,versicolor,2025-10-02 10:40:17.172178
21,2025-09-23 10:40:17.102131,1002,4.91,3.01,1.41,0.3,setosa,2025-10-02 10:40:17.172178


In [53]:
from joblib import load

# Read features from Feast
fetch_model = load("iris_model.bin")

test_features = fs.get_online_features(
    entity_rows = [{"iris_id": iris_id} for iris_id in test['iris_id']],
    features = features_ref,
).to_df()


In [54]:
test_features[['sepal_length','sepal_width','petal_length','petal_width']]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.21,3.43,1.58,0.36
1,5.5,2.36,3.77,1.31
2,4.85,2.94,1.51,0.32
3,4.85,2.94,1.51,0.32
4,4.85,2.94,1.51,0.32
5,5.5,2.36,3.77,1.31
6,4.85,2.94,1.51,0.32


In [55]:
test_feat = test_features[['sepal_length','sepal_width','petal_length','petal_width']]

# Make prediction
predictions = fetch_model.predict(test_feat)
test_feat['predictions'] = predictions
print("Predictions using online store:")
print(test_feat)

Predictions using online store:
   sepal_length  sepal_width  petal_length  petal_width predictions
0          5.21         3.43          1.58         0.36  versicolor
1          5.50         2.36          3.77         1.31  versicolor
2          4.85         2.94          1.51         0.32      setosa
3          4.85         2.94          1.51         0.32      setosa
4          4.85         2.94          1.51         0.32      setosa
5          5.50         2.36          3.77         1.31  versicolor
6          4.85         2.94          1.51         0.32      setosa
