# Assignment 3.1: Feature Store - Exercise
Sinthuja Bates

## Setup notebook

In [2]:
# import libraries
import boto3
import pandas as pd
import sagemaker
import time
from decimal import Decimal, ROUND_HALF_UP
from sagemaker import get_execution_role
from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.session import Session
from sklearn.preprocessing import OrdinalEncoder

In [3]:
# setup boto
original_boto3_version = boto3.__version__
%pip install 'boto3>1.17.21'

[0mNote: you may need to restart the kernel to use updated packages.


In [4]:
# setup feature store
region = boto3.Session().region_name
role = get_execution_role()

boto_session = boto3.Session(region_name=region)

sagemaker_client = boto_session.client(service_name="sagemaker", region_name=region)
featurestore_runtime = boto_session.client(
    service_name="sagemaker-featurestore-runtime", region_name=region
)

feature_store_session = Session(
    boto_session=boto_session,
    sagemaker_client=sagemaker_client,
    sagemaker_featurestore_runtime_client=featurestore_runtime,
)

In [5]:
# setup s3 bucket
default_s3_bucket_name = feature_store_session.default_bucket()
prefix = "homework-3-1"

print(default_s3_bucket_name)

sagemaker-us-east-1-711667138246


## Import data

In [6]:
# import housing data
housing = pd.read_csv("housing.csv")
housing.head(5)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [7]:
# import gmaps data
gmaps = pd.read_csv("housing_gmaps_data_raw.csv")
gmaps.head(5)

Unnamed: 0,street_number,route,locality-political,administrative_area_level_2-political,administrative_area_level_1-political,country-political,postal_code,address,longitude,latitude,...,establishment-natural_feature,airport-establishment-point_of_interest,political-sublocality-sublocality_level_1,administrative_area_level_3-political,post_box,establishment-light_rail_station-point_of_interest-transit_station,establishment-point_of_interest,aquarium-establishment-park-point_of_interest-tourist_attraction-zoo,campground-establishment-lodging-park-point_of_interest-rv_park-tourist_attraction,cemetery-establishment-park-point_of_interest
0,3130,Grizzly Peak Boulevard,Berkeley,Alameda County,California,United States,94705.0,"3130 Grizzly Peak Blvd, Berkeley, CA 94705, USA",-122.23,37.88,...,,,,,,,,,,
1,2005,Tunnel Road,Oakland,Alameda County,California,United States,94611.0,"2005 Tunnel Rd, Oakland, CA 94611, USA",-122.22,37.86,...,,,,,,,,,,
2,6886,Chabot Road,Oakland,Alameda County,California,United States,94618.0,"6886 Chabot Rd, Oakland, CA 94618, USA",-122.24,37.85,...,,,,,,,,,,
3,6365,Florio Street,Oakland,Alameda County,California,United States,94618.0,"6365 Florio St, Oakland, CA 94618, USA",-122.25,37.85,...,,,,,,,,,,
4,5407,Bryant Avenue,Oakland,Alameda County,California,United States,94618.0,"5407 Bryant Ave, Oakland, CA 94618, USA",-122.25,37.84,...,,,,,,,,,,


In [8]:
# merge the datasets based on latitude and longitude columns
df = pd.merge(housing, gmaps, on=['latitude', 'longitude'], how='outer')

df.to_csv("merged_data.csv", index=False)
df.head(5)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity,...,establishment-natural_feature,airport-establishment-point_of_interest,political-sublocality-sublocality_level_1,administrative_area_level_3-political,post_box,establishment-light_rail_station-point_of_interest-transit_station,establishment-point_of_interest,aquarium-establishment-park-point_of_interest-tourist_attraction-zoo,campground-establishment-lodging-park-point_of_interest-rv_park-tourist_attraction,cemetery-establishment-park-point_of_interest
0,-117.04,32.54,7.0,938.0,297.0,1187.0,282.0,1.2667,67500.0,NEAR OCEAN,...,,,Zona Norte,,,,,,,
1,-117.09,32.55,8.0,6533.0,1217.0,4797.0,1177.0,3.9583,144400.0,NEAR OCEAN,...,,,,,,,,,,
2,-117.06,32.55,5.0,3223.0,940.0,3284.0,854.0,1.4384,108800.0,NEAR OCEAN,...,,,,,,,,,,
3,-117.04,32.55,15.0,2206.0,648.0,2511.0,648.0,1.6348,93200.0,NEAR OCEAN,...,,,,,,,,,,
4,-117.12,32.56,20.0,2524.0,682.0,1819.0,560.0,2.9286,257700.0,NEAR OCEAN,...,,,,,,,,,,


In [9]:
df.dtypes

longitude                                                                             float64
latitude                                                                              float64
housing_median_age                                                                    float64
total_rooms                                                                           float64
total_bedrooms                                                                        float64
population                                                                            float64
households                                                                            float64
median_income                                                                         float64
median_house_value                                                                    float64
ocean_proximity                                                                        object
street_number                                               

## Feature Engineering
primary_key - neighborhood derived from neighborhood-political
event_time - calculated using python
<1h ocean
          one hot encoded column derived from ocean_proximity
inland
          one hot encoded column derived from ocean_proximity
island
          one hot encoded column derived from ocean_proximity
near bay
          one hot encoded column derived from ocean_proximity
near ocean
          one hot encoded column derived from ocean_proximity
median house value
          derived from median_house_value
          average this value across all records for a neighborhood
          cap this value at 500,000
median house age
          derived from median_house_age
          average this value across all records for a neighborhood
          discretized by groups of 10 years i.e. 0-9, 10-19, 20-29, etc…
total households
          derived from households
          average this value across all records for a neighborhood
          must be an integer (round up if needed)
bedrooms per household
          derived from total_bedrooms and households
impute missing values by getting average for locality-code
          locality-code
encoded locality-political value


In [10]:
# create a new dataset with relevant columns
neighborhood = df[['neighborhood-political', 
                   'ocean_proximity', 
                   'median_house_value',
                   'housing_median_age',
                   'households',
                   'total_bedrooms',
                   'locality-political']]

# drop duplicates to keep only distinct values
neighborhood = neighborhood.drop_duplicates()

neighborhood.head()

Unnamed: 0,neighborhood-political,ocean_proximity,median_house_value,housing_median_age,households,total_bedrooms,locality-political
0,,NEAR OCEAN,67500.0,7.0,282.0,297.0,Tijuana
1,Tijuana River Valley,NEAR OCEAN,144400.0,8.0,1177.0,1217.0,San Diego
2,Tijuana River Valley,NEAR OCEAN,108800.0,5.0,854.0,940.0,San Diego
3,San Ysidro,NEAR OCEAN,93200.0,15.0,648.0,648.0,San Diego
4,,NEAR OCEAN,257700.0,20.0,560.0,682.0,Imperial Beach


In [11]:
# rename neighborhood-political column
neighborhood.rename(columns={'neighborhood-political': 'neighborhood'}, inplace=True)

# calculate event_time
neighborhood['event_time'] = pd.Timestamp.now()
neighborhood.head()

Unnamed: 0,neighborhood,ocean_proximity,median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time
0,,NEAR OCEAN,67500.0,7.0,282.0,297.0,Tijuana,2024-05-26 20:33:06.121003
1,Tijuana River Valley,NEAR OCEAN,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 20:33:06.121003
2,Tijuana River Valley,NEAR OCEAN,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 20:33:06.121003
3,San Ysidro,NEAR OCEAN,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 20:33:06.121003
4,,NEAR OCEAN,257700.0,20.0,560.0,682.0,Imperial Beach,2024-05-26 20:33:06.121003


In [12]:
# one-hot encode 'ocean_proximity' column
neighborhood = pd.get_dummies(neighborhood, columns=['ocean_proximity'])

# rename columns
neighborhood.rename(columns={'ocean_proximity_<1H OCEAN': '1h_ocean'}, inplace=True)
neighborhood.rename(columns={'ocean_proximity_INLAND': 'inland'}, inplace=True)
neighborhood.rename(columns={'ocean_proximity_ISLAND': 'island'}, inplace=True)
neighborhood.rename(columns={'ocean_proximity_NEAR BAY': 'near_bay'}, inplace=True)
neighborhood.rename(columns={'ocean_proximity_NEAR OCEAN': 'near_ocean'}, inplace=True)

neighborhood.head()

Unnamed: 0,neighborhood,median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time,1h_ocean,inland,island,near_bay,near_ocean
0,,67500.0,7.0,282.0,297.0,Tijuana,2024-05-26 20:33:06.121003,False,False,False,False,True
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True
4,,257700.0,20.0,560.0,682.0,Imperial Beach,2024-05-26 20:33:06.121003,False,False,False,False,True


In [13]:
# rename old median_house_value 
neighborhood.rename(columns={'median_house_value': 'old_median_house_value'}, inplace=True)

# derive 'median_house_value' by averaging across neighborhoods and capping at 500,000
neighborhood['median_house_value'] = neighborhood.groupby('neighborhood')['old_median_house_value'].transform('mean')
neighborhood['median_house_value'] = neighborhood['median_house_value'].apply(lambda x: min(x, 500000))

# derive 'median_house_age' by averaging across neighborhoods and discretizing into groups of 10 years
# define custom labels
labels = [f"{i}-{i + 9}" for i in range(0, 90, 10)]  # Create labels like '0-9', '10-19'...

neighborhood['median_house_age'] = neighborhood.groupby('neighborhood')['housing_median_age'].transform('mean')
neighborhood['median_house_age'] = pd.cut(neighborhood['median_house_age'], bins=range(0, 100, 10), labels=labels)

neighborhood.head()

Unnamed: 0,neighborhood,old_median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time,1h_ocean,inland,island,near_bay,near_ocean,median_house_value,median_house_age
0,,67500.0,7.0,282.0,297.0,Tijuana,2024-05-26 20:33:06.121003,False,False,False,False,True,,
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,126600.0,0-9
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,126600.0,0-9
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,124550.0,10-19
4,,257700.0,20.0,560.0,682.0,Imperial Beach,2024-05-26 20:33:06.121003,False,False,False,False,True,,


In [14]:
# derive 'total_households' by averaging across neighborhoods and rounding up to the nearest integer
neighborhood['total_households'] = neighborhood.groupby('neighborhood')['households'].transform('mean')
neighborhood['total_households'] = neighborhood['total_households'].transform(lambda x: Decimal(x).to_integral_value(rounding=ROUND_HALF_UP))

# rename old total bedrooms
neighborhood.rename(columns={'total_bedrooms': 'old_total_bedrooms'}, inplace=True)

# derive 'total_bedrooms' by averaging across neighborhoods and rounding up to the nearest integer
neighborhood['total_bedrooms'] = neighborhood.groupby('neighborhood')['old_total_bedrooms'].transform('mean')
neighborhood['total_bedrooms'] = neighborhood['total_bedrooms'].transform(lambda x: Decimal(x).to_integral_value(rounding=ROUND_HALF_UP))

# derive 'bedrooms_per_household'
neighborhood['bedrooms_per_household'] = neighborhood['total_bedrooms'] / neighborhood['total_households']
neighborhood.head()

Unnamed: 0,neighborhood,old_median_house_value,housing_median_age,households,old_total_bedrooms,locality-political,event_time,1h_ocean,inland,island,near_bay,near_ocean,median_house_value,median_house_age,total_households,total_bedrooms,bedrooms_per_household
0,,67500.0,7.0,282.0,297.0,Tijuana,2024-05-26 20:33:06.121003,False,False,False,False,True,,,,,
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,126600.0,0-9,1016.0,1079.0,1.062007874015748
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,126600.0,0-9,1016.0,1079.0,1.062007874015748
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,124550.0,10-19,546.0,562.0,1.0293040293040292
4,,257700.0,20.0,560.0,682.0,Imperial Beach,2024-05-26 20:33:06.121003,False,False,False,False,True,,,,,


In [15]:
# Create an OrdinalEncoder instance
ordinal_encoder = OrdinalEncoder()

# Reshape the 'locality-political' column to a 2D array
locality_political_column = neighborhood['locality-political'].values.reshape(-1, 1)

# Fit the OrdinalEncoder to the 'locality-political' column and transform it
neighborhood['locality-code'] = ordinal_encoder.fit_transform(locality_political_column)

In [16]:
# Calculate the mean 'bedrooms_per_household' for each 'locality-code'
mean_bedrooms_per_household = neighborhood.groupby('locality-code')['bedrooms_per_household'].transform('mean')

# Fill missing values in 'bedrooms_per_household' with the mean for each 'locality-code'
neighborhood['bedrooms_per_household'].fillna(mean_bedrooms_per_household, inplace=True)

neighborhood.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  neighborhood['bedrooms_per_household'].fillna(mean_bedrooms_per_household, inplace=True)


Unnamed: 0,neighborhood,old_median_house_value,housing_median_age,households,old_total_bedrooms,locality-political,event_time,1h_ocean,inland,island,near_bay,near_ocean,median_house_value,median_house_age,total_households,total_bedrooms,bedrooms_per_household,locality-code
0,,67500.0,7.0,282.0,297.0,Tijuana,2024-05-26 20:33:06.121003,False,False,False,False,True,,,,,,894.0
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,126600.0,0-9,1016.0,1079.0,1.062007874015748,778.0
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,126600.0,0-9,1016.0,1079.0,1.062007874015748,778.0
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 20:33:06.121003,False,False,False,False,True,124550.0,10-19,546.0,562.0,1.0293040293040292,778.0
4,,257700.0,20.0,560.0,682.0,Imperial Beach,2024-05-26 20:33:06.121003,False,False,False,False,True,,,,,1.046653,398.0


In [17]:
# drop columns not needed for feature store
neighborhood = neighborhood.drop(columns=['old_median_house_value',
                                         'housing_median_age',
                                          'households',
                                          'old_total_bedrooms',	
                                          'locality-political',
                                         ]) 

In [18]:
# Save the processed DataFrame to a new CSV file
neighborhood.to_csv("neighborhood.csv", index=False)

## Ingest Data into FeatureStore

In [19]:
# define feature group name
feature_group_name = "homework"

# define feature group
feature_group = FeatureGroup(
    name=feature_group_name, sagemaker_session=feature_store_session
)

# check column types
neighborhood.dtypes

neighborhood                      object
event_time                datetime64[us]
1h_ocean                            bool
inland                              bool
island                              bool
near_bay                            bool
near_ocean                          bool
median_house_value               float64
median_house_age                category
total_households                  object
total_bedrooms                    object
bedrooms_per_household            object
locality-code                    float64
dtype: object

In [20]:
# Convert datetime to Unix timestamp (seconds since epoch)
neighborhood['event_time'] = neighborhood['event_time'].astype(int) / 10**9

In [21]:
# function to convert object or category columns to string
def cast_object_to_string(data_frame):
    for label in data_frame.columns:
        if data_frame.dtypes[label] == "object":
            data_frame[label] = data_frame[label].astype("str").astype("string")
        if data_frame.dtypes[label] == "category":
            data_frame[label] = data_frame[label].astype("str").astype("string")

# update neighborhood dataframe
cast_object_to_string(neighborhood)

In [22]:
# function to convert bool columns to int
def cast_bool_to_int(data_frame):
    for label in data_frame.columns:
        if data_frame.dtypes[label] == "bool":
            data_frame[label] =  data_frame[label].astype(int)

# update neighborhood dataframe
cast_bool_to_int(neighborhood)

In [23]:
neighborhood.dtypes

neighborhood              string[python]
event_time                       float64
1h_ocean                           int64
inland                             int64
island                             int64
near_bay                           int64
near_ocean                         int64
median_house_value               float64
median_house_age          string[python]
total_households          string[python]
total_bedrooms            string[python]
bedrooms_per_household    string[python]
locality-code                    float64
dtype: object

In [24]:
# load feature definitions to the feature group
feature_group.load_feature_definitions(data_frame=neighborhood)

[FeatureDefinition(feature_name='neighborhood', feature_type=<FeatureTypeEnum.STRING: 'String'>),
 FeatureDefinition(feature_name='event_time', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='1h_ocean', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='inland', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='island', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='near_bay', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='near_ocean', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='median_house_value', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='median_house_age', feature_type=<FeatureTypeEnum.STRING: 'String'>),
 FeatureDefinition(feature_name='total_households', feature_type=<FeatureTypeEnum.STRING: 'Strin

## Create FeatureGroups in SageMaker FeatureStore

In [25]:
def wait_for_feature_group_creation_complete(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    while status == "Creating":
        print("Waiting for Feature Group Creation")
        time.sleep(5)
        status = feature_group.describe().get("FeatureGroupStatus")
    if status != "Created":
        raise RuntimeError(f"Failed to create feature group {feature_group.name}")
    print(f"FeatureGroup {feature_group.name} successfully created.")


# record identifier and event time feature names
record_identifier_feature_name = "neighborhood"
event_time_feature_name = "event_time"    

feature_group.create(
    s3_uri=f"s3://{default_s3_bucket_name}/{prefix}",
    record_identifier_name=record_identifier_feature_name,
    event_time_feature_name=event_time_feature_name,
    role_arn=role,
    enable_online_store=True,
)

wait_for_feature_group_creation_complete(feature_group=feature_group)

Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
FeatureGroup homework successfully created.


In [26]:
# check that the feature group exists
feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:711667138246:feature-group/homework',
 'FeatureGroupName': 'homework',
 'RecordIdentifierFeatureName': 'neighborhood',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'neighborhood',
   'FeatureType': 'String'},
  {'FeatureName': 'event_time', 'FeatureType': 'Fractional'},
  {'FeatureName': '1h_ocean', 'FeatureType': 'Integral'},
  {'FeatureName': 'inland', 'FeatureType': 'Integral'},
  {'FeatureName': 'island', 'FeatureType': 'Integral'},
  {'FeatureName': 'near_bay', 'FeatureType': 'Integral'},
  {'FeatureName': 'near_ocean', 'FeatureType': 'Integral'},
  {'FeatureName': 'median_house_value', 'FeatureType': 'Fractional'},
  {'FeatureName': 'median_house_age', 'FeatureType': 'String'},
  {'FeatureName': 'total_households', 'FeatureType': 'String'},
  {'FeatureName': 'total_bedrooms', 'FeatureType': 'String'},
  {'FeatureName': 'bedrooms_per_household', 'FeatureType': 'String'},
  {'FeatureName': 'locality-

## Ingest Data into FeatureStore

In [27]:
# ingest data
feature_group.ingest(data_frame=neighborhood, max_workers=3, wait=True)

IngestionManagerPandas(feature_group_name='homework', sagemaker_session=<sagemaker.session.Session object at 0x7f644e494730>, data_frame=               neighborhood    event_time  1h_ocean  inland  island  near_bay  \
0                       nan  1.716756e+06         0       0       0         0   
1      Tijuana River Valley  1.716756e+06         0       0       0         0   
2      Tijuana River Valley  1.716756e+06         0       0       0         0   
3                San Ysidro  1.716756e+06         0       0       0         0   
4                       nan  1.716756e+06         0       0       0         0   
...                     ...           ...       ...     ...     ...       ...   
20635                   nan  1.716756e+06         0       1       0         0   
20636                   nan  1.716756e+06         1       0       0         0   
20637                   nan  1.716756e+06         0       0       0         0   
20638                   nan  1.716756e+06         0  

## Query Feature Values
1. Brooktree
2. Fisherman’s Wharf
3. Los Osos

In [28]:
# brooktree
featurestore_runtime.get_record(
    FeatureGroupName=feature_group_name,
    RecordIdentifierValueAsString="Brooktree",
)

{'ResponseMetadata': {'RequestId': 'f3f1a605-7951-49c9-b299-9d4297d2c63d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f3f1a605-7951-49c9-b299-9d4297d2c63d',
   'content-type': 'application/json',
   'content-length': '1079',
   'date': 'Sun, 26 May 2024 20:34:40 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'neighborhood', 'ValueAsString': 'Brooktree'},
  {'FeatureName': 'event_time', 'ValueAsString': '1716755.586121003'},
  {'FeatureName': '1h_ocean', 'ValueAsString': '1'},
  {'FeatureName': 'inland', 'ValueAsString': '0'},
  {'FeatureName': 'island', 'ValueAsString': '0'},
  {'FeatureName': 'near_bay', 'ValueAsString': '0'},
  {'FeatureName': 'near_ocean', 'ValueAsString': '0'},
  {'FeatureName': 'median_house_value', 'ValueAsString': '257400.0'},
  {'FeatureName': 'median_house_age', 'ValueAsString': '0-9'},
  {'FeatureName': 'total_households', 'ValueAsString': '1438'},
  {'FeatureName': 'total_bedrooms', 'ValueAsString': 'NaN'},
  {'FeatureName': '

In [29]:
# fisherman's wharf
featurestore_runtime.get_record(
    FeatureGroupName=feature_group_name,
    RecordIdentifierValueAsString="Fisherman's Wharf",
)

{'ResponseMetadata': {'RequestId': '2effe718-304a-41da-a361-7d05a8c8c6b6',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '2effe718-304a-41da-a361-7d05a8c8c6b6',
   'content-type': 'application/json',
   'content-length': '1075',
   'date': 'Sun, 26 May 2024 20:34:40 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'neighborhood',
   'ValueAsString': "Fisherman's Wharf"},
  {'FeatureName': 'event_time', 'ValueAsString': '1716755.586121003'},
  {'FeatureName': '1h_ocean', 'ValueAsString': '0'},
  {'FeatureName': 'inland', 'ValueAsString': '0'},
  {'FeatureName': 'island', 'ValueAsString': '0'},
  {'FeatureName': 'near_bay', 'ValueAsString': '1'},
  {'FeatureName': 'near_ocean', 'ValueAsString': '0'},
  {'FeatureName': 'median_house_value', 'ValueAsString': '500000.0'},
  {'FeatureName': 'median_house_age', 'ValueAsString': '50-59'},
  {'FeatureName': 'total_households', 'ValueAsString': '250'},
  {'FeatureName': 'total_bedrooms', 'ValueAsString': '317'},
  {'Fea

In [30]:
# los osos
featurestore_runtime.get_record(
    FeatureGroupName=feature_group_name,
    RecordIdentifierValueAsString="Los Osos",
)

{'ResponseMetadata': {'RequestId': '415cde8d-4631-410d-9e67-46ae74944c54',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '415cde8d-4631-410d-9e67-46ae74944c54',
   'content-type': 'application/json',
   'content-length': '1089',
   'date': 'Sun, 26 May 2024 20:34:41 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'neighborhood', 'ValueAsString': 'Los Osos'},
  {'FeatureName': 'event_time', 'ValueAsString': '1716755.586121003'},
  {'FeatureName': '1h_ocean', 'ValueAsString': '0'},
  {'FeatureName': 'inland', 'ValueAsString': '0'},
  {'FeatureName': 'island', 'ValueAsString': '0'},
  {'FeatureName': 'near_bay', 'ValueAsString': '0'},
  {'FeatureName': 'near_ocean', 'ValueAsString': '1'},
  {'FeatureName': 'median_house_value', 'ValueAsString': '221612.5'},
  {'FeatureName': 'median_house_age', 'ValueAsString': '10-19'},
  {'FeatureName': 'total_households', 'ValueAsString': '612'},
  {'FeatureName': 'total_bedrooms', 'ValueAsString': '643'},
  {'FeatureName': '

## Shut down notebook resources

In [31]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [32]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}

<IPython.core.display.Javascript object>