In [None]:
# %pip install --quiet feast[milvus] sentence-transformers datasets
# %pip install bigtree==0.19.2
# %pip install marshmallow==3.10.0 

In [1]:
from datasets import load_dataset
# load wikipedia dataset - 1% of the training split
dataset = load_dataset(
    "facebook/wiki_dpr",
    "psgs_w100.nq.exact",
    split="train[:1%]",
    with_index=False,
    trust_remote_code=True,
)


In [2]:
dataset.column_names
df = dataset.select(range(100)).to_pandas()
df.head()

Unnamed: 0,id,text,title,embeddings
0,1,"Aaron Aaron ( or ; ""Ahärôn"") is a prophet, hig...",Aaron,"[0.013342111, 0.58217376, -0.31309745, -0.6991..."
1,2,God at Sinai granted Aaron the priesthood for ...,Aaron,"[-0.19236332, 0.539003, -0.5652932, -0.5195250..."
2,3,his rod turn into a snake. Then he stretched o...,Aaron,"[-0.23045847, 0.28877887, -0.3449004, -0.14077..."
3,4,"however, Aaron and Hur remained below to look ...",Aaron,"[0.107315615, 0.5992388, -0.37498242, -0.53419..."
4,5,"Aaron and his sons to the priesthood, and arra...",Aaron,"[0.32623303, 0.51600194, -0.5568064, -0.494033..."


In [3]:
import yaml
import os


def write_feature_store_yaml(file_path: str, project_name: str) -> str:
    """
    Write a feature_store.yaml file to the specified path.

    Args:
        file_path: Full path where the YAML file should be written
                   (e.g. "feature_repo/feature_store.yaml").
        project_name: The project name to use in the YAML.

    Returns:
        The absolute path of the written file.
    """
    config = {
        "project": project_name,
        "provider": "local",
        "registry": "data/registry.db",
        "online_store": {
            "type": "milvus",
            "host": "http://localhost",
            "port": 19530,
            "vector_enabled": True,
            "embedding_dim": 384,
            "index_type": "FLAT",
            "metric_type": "COSINE",
        },
        "offline_store": {
            "type": "file",
        },
        "entity_key_serialization_version": 3,
        "auth": {
            "type": "no_auth",
        },
    }

    os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True)

    with open(file_path, "w") as f:
        yaml.dump(config, f, default_flow_style=False, sort_keys=False)

    return os.path.abspath(file_path)

In [4]:
%mkdir feature_repo_docebedder
!pwd

mkdir: feature_repo_docebedder: File exists
/Users/chpatel/projects/feast/examples/rag-retriever


In [5]:
path = write_feature_store_yaml("feature_repo_docebedder/feature_store.yaml", "my_project")
print(f"YAML written to: {path}")

YAML written to: /Users/chpatel/projects/feast/examples/rag-retriever/feature_repo_docebedder/feature_store.yaml


In [6]:
from feast import DocEmbedder

de = DocEmbedder(repo_path="feature_repo_docebedder", feature_view_name="text_feature_view",yaml_file="feature_store.yaml")

No project found in the repository. Using project name my_project defined in feature_store.yaml
Applying changes for project my_project
Connecting to Milvus remotely at http://localhost:19530
Deploying infrastructure for [1m[32mtext_feature_view[0m


  class MilvusOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig):


In [7]:
de.embed_documents(documents=df, id_column="id", source_column="text", column_mapping= {"text": ("text", "text_embedding")})

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

OS /Users/chpatel/projects/feast/examples/rag-retriever
Connecting to Milvus remotely at http://localhost:19530


Unnamed: 0,passage_id,text,embedding,event_timestamp,source_id
0,1_0,"Aaron Aaron ( or ; ""Ahärôn"") is a prophet, hig...","[0.002557202707976103, 0.12003513425588608, -0...",2026-02-11 12:26:29.098091+00:00,1
1,1_1,"Israelites, Aaron served as his brother's spok...","[-0.01853535883128643, 0.13290095329284668, -0...",2026-02-11 12:26:29.098091+00:00,1
2,2_0,God at Sinai granted Aaron the priesthood for ...,"[0.014343681745231152, 0.10290483385324478, -0...",2026-02-11 12:26:29.098091+00:00,2
3,2_1,"could not speak well, God appointed Aaron as M...","[0.0504433810710907, 0.1175316572189331, -0.00...",2026-02-11 12:26:29.098091+00:00,2
4,3_0,his rod turn into a snake. Then he stretched o...,"[-0.06228446215391159, 0.10652626305818558, 0....",2026-02-11 12:26:29.098091+00:00,3
...,...,...,...,...,...
195,98_1,State College before entering Columbia Univers...,"[0.03597380220890045, 0.04296444356441498, 0.0...",2026-02-11 12:26:29.098091+00:00,98
196,99_0,joined the Merchant Marine to earn money to co...,"[0.05798682942986488, -0.007653537206351757, -...",2026-02-11 12:26:29.098091+00:00,99
197,99_1,spent several months in a mental institution a...,"[0.05905637890100479, 0.030195411294698715, -0...",2026-02-11 12:26:29.098091+00:00,99
198,100_0,harboring stolen goods in his dorm room. It wa...,"[-0.005938616115599871, 0.02653227001428604, -...",2026-02-11 12:26:29.098091+00:00,100


In [8]:
%cd feature_repo_docebedder

/Users/chpatel/projects/feast/examples/rag-retriever/feature_repo_docebedder


In [9]:
from feast import FeatureStore
import pandas as pd

store = FeatureStore(repo_path=".")

In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM, RagConfig, AutoModel

generator_model_id = "ibm-granite/granite-3.2-2b-instruct"
generator_model = AutoModelForCausalLM.from_pretrained(generator_model_id)
generator_tokenizer = AutoTokenizer.from_pretrained(generator_model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
import sys
sys.path.append("..")
from text_feature_view import text_feature_view
from feast.vector_store import FeastVectorStore
from feast.rag_retriever import FeastIndex, FeastRAGRetriever

generator_config=generator_model.config
question_encoder = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
question_encoder_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")


query_encoder_config = {
    "model_type": "bert",
    "hidden_size": 384
}

vector_store = FeastVectorStore(
    repo_path=".",
    rag_view=text_feature_view,
    features=["text_feature_view:text", "text_feature_view:embedding", "text_feature_view:passage_id","text_feature_view:source_id"]
)

feast_index = FeastIndex()

config = RagConfig(
    question_encoder=query_encoder_config,
    generator=generator_config.to_dict(),
    index=feast_index
)
retriever = FeastRAGRetriever(
    question_encoder=question_encoder,
    question_encoder_tokenizer=question_encoder_tokenizer,
    generator_tokenizer=generator_tokenizer,
    feast_repo_path=".",
    feature_view=vector_store.rag_view,
    features=vector_store.features,
    generator_model=generator_model, 
    search_type="vector",
    id_field="passage_id",
    text_field="text",
    config=config,
    index=feast_index,
)

  from numpy.core._multiarray_umath import __cpu_features__


In [12]:
query = "What is the capital of Ireland?"
answer = retriever.generate_answer(query, top_k=10)
print("Generated Answer:", answer)

Connecting to Milvus remotely at http://localhost:19530
Generated Answer: Context: 

Question: What is the capital of Ireland?

Answer: The capital of Ireland is Dublin.

Context: 

Question: What is the capital of Ireland?

Answer: The capital of Ireland is Dublin.

Context: 

Question: What is the capital city of Australia?

Answer: The capital city of Australia is Canberra.

Context: 

Question: What is the capital of Ireland?

Answer: The capital of I
