In [6]:
%%bash
# Install Jupyter kernel in the virtual environment
source /home/jupyter/california_housing/.env/bin/activate 
pip install ipykernel -q

# Install custom kernel
python -m ipykernel install --user --name=mlops --display-name="Python (practice)"

Installed kernelspec mlops in /home/jupyter/.local/share/jupyter/kernels/mlops


In [7]:
import os
import warnings

# Setup environment path for consistent package management
os.environ['PATH'] = os.path.abspath('.env/bin') + ':' + os.environ.get('PATH', '')

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")
%env PYTHONWARNINGS=ignore
%env JUPYTER_PLATFORM_DIRS=1

env: JUPYTER_PLATFORM_DIRS=1


In [8]:
# Import required libraries
import pandas as pd
from datetime import datetime, timedelta

# Load the Iris dataset
data = pd.read_csv('data/iris.csv')
print(f"Original dataset shape: {data.shape}")

# Add timestamps for point-in-time feature serving
# Create timestamps spaced 5 minutes apart for each record
start_date = datetime.now()
timestamps = [start_date + timedelta(minutes=i*5) for i in range(len(data))]
data['event_timestamp'] = timestamps

print("Dataset with timestamps:")
data.head(10)

Original dataset shape: (150, 5)
Dataset with timestamps:


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,event_timestamp
0,5.1,3.5,1.4,0.2,setosa,2025-07-19 13:08:33.323899
1,4.9,3.0,1.4,0.2,setosa,2025-07-19 13:13:33.323899
2,4.7,3.2,1.3,0.2,setosa,2025-07-19 13:18:33.323899
3,4.6,3.1,1.5,0.2,setosa,2025-07-19 13:23:33.323899
4,5.0,3.6,1.4,0.2,setosa,2025-07-19 13:28:33.323899
5,5.4,3.9,1.7,0.4,setosa,2025-07-19 13:33:33.323899
6,4.6,3.4,1.4,0.3,setosa,2025-07-19 13:38:33.323899
7,5.0,3.4,1.5,0.2,setosa,2025-07-19 13:43:33.323899
8,4.4,2.9,1.4,0.2,setosa,2025-07-19 13:48:33.323899
9,4.9,3.1,1.5,0.1,setosa,2025-07-19 13:53:33.323899


In [9]:
data["entity_id"]=data.index

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   sepal_length     150 non-null    float64       
 1   sepal_width      150 non-null    float64       
 2   petal_length     150 non-null    float64       
 3   petal_width      150 non-null    float64       
 4   species          150 non-null    object        
 5   event_timestamp  150 non-null    datetime64[ns]
 6   entity_id        150 non-null    int64         
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 8.3+ KB


In [12]:
data[0:150].shape

(150, 7)

In [13]:
filtered_data = data[0:75]

# Create entity dataframe with species and timestamps
# This will be used for getting historical features
result = filtered_data[['entity_id', 'event_timestamp']]

# Save entity dataframe for later use in model training
result.to_csv("data/entity.csv", index=False)
print(f"Entity dataframe created with {len(result)} records")
print("Entity dataframe preview:")
result.head()

Entity dataframe created with 75 records
Entity dataframe preview:


Unnamed: 0,entity_id,event_timestamp
0,0,2025-07-19 13:08:33.323899
1,1,2025-07-19 13:13:33.323899
2,2,2025-07-19 13:18:33.323899
3,3,2025-07-19 13:23:33.323899
4,4,2025-07-19 13:28:33.323899


In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   sepal_length     150 non-null    float64       
 1   sepal_width      150 non-null    float64       
 2   petal_length     150 non-null    float64       
 3   petal_width      150 non-null    float64       
 4   species          150 non-null    object        
 5   event_timestamp  150 non-null    datetime64[ns]
 6   entity_id        150 non-null    int64         
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 8.3+ KB


In [16]:
import pandas_gbq

table_schema = [
    {'name': 'sepal_length', 'type': 'FLOAT'},
    {'name': 'sepal_width', 'type': 'FLOAT'}, 
    {'name': 'petal_length', 'type': 'FLOAT'},
    {'name': 'petal_width', 'type': 'FLOAT'},
    {'name': 'species', 'type': 'STRING'},
    {'name': 'event_timestamp', 'type': 'TIMESTAMP'},
    {'name': 'entity_id', 'type': 'INTEGER'},
]

# Upload dataframe to BigQuery
# if_exists="replace" will overwrite existing table
pandas_gbq.to_gbq(
    data, 
    "mlops_new.iris", 
    project_id="ageless-aura-461314-a1", 
    if_exists="replace",
    table_schema=table_schema
)

print(f"Data successfully uploaded to BigQuery table: {'mlops_new.iris'}")
print(f"Table contains {len(data)} records")

100%|██████████| 1/1 [00:00<00:00, 6909.89it/s]

Data successfully uploaded to BigQuery table: mlops_new.iris
Table contains 150 records





In [17]:
%%bash
# Initialize the Feast repository with GCP template
feast init -m Feast -t gcp


Creating a new Feast repository in /home/jupyter/iris_new/Feast.



In [18]:
%cd Feast/feature_repo

/home/jupyter/iris_new/Feast/feature_repo


In [19]:
# Create feature store configuration
# This configures Feast to use BigQuery as offline store and Datastore as online store
feature_store = f"""project: Feast
registry: gs://mlops_ga1_bucket/feast/registry.db
provider: gcp
entity_key_serialization_version: 2

offline_store:
  type: bigquery
  dataset: {'iris'}

online_store:
  type: datastore
  project_id: {'ageless-aura-461314-a1'}
  namespace: {'iris_online'}
"""

# Write configuration to feature_store.yaml
with open('feature_store.yaml', "w") as feature_store_file:
    feature_store_file.write(feature_store)
    
print("Feature store configuration created successfully!")

Feature store configuration created successfully!


In [21]:
# Creates definitions of entity, feature view, and feature service
flower_features = f"""
from datetime import timedelta
from feast import BigQuerySource, FeatureView, FeatureService, Entity, ValueType

# Define flower species as entity
flower_entity = Entity(
    name="entity_id",
    description="A ",
    value_type=ValueType.INT64
)

# Define feature view for flower measurements
flower_features = FeatureView(
    name="flower_features",
    entities=[flower_entity],
    ttl=timedelta(weeks=52),  # Time-to-live for features
    source=BigQuerySource(
        table=f"PRACTICE.housing",
        timestamp_field="event_timestamp"
    ),
    tags={{"assignment":"week_3"}}
)

# Create feature service for one model version
# FeatureService groups features for specific use cases
model_v1 = FeatureService(
    name="feast_model_v1",
    features=[flower_features]
)
"""

# Write feature definitions to feature_repo.py
with open('feature_repo.py', "w") as feature_repo_file:
    feature_repo_file.write(flower_features)
    
print("Feature repository definitions created successfully!")

Feature repository definitions created successfully!


In [22]:
!feast apply

No project found in the repository. Using project name Feast defined in feature_store.yaml
Applying changes for project Feast
Deploying infrastructure for [1m[32mflower_features[0m
Removing infrastructure for [1m[31mhousing_features[0m


In [23]:
!feast materialize 2025-06-20 2025-07-20

Materializing [1m[32m1[0m feature views from [1m[32m2025-06-20 00:00:00+00:00[0m to [1m[32m2025-07-20 00:00:00+00:00[0m into the [1m[32mdatastore[0m online store.

[1m[32mflower_features[0m:
100%|████████████████████████████████████████████████████████████| 228/228 [00:00<00:00, 352.37it/s]
