# Assignment 3.1: Feature Store - Exercise
Sinthuja Bates

## Setup notebook

In [179]:
# import libraries
import boto3
import pandas as pd
import sagemaker
import time
from sagemaker import get_execution_role
from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.session import Session

In [156]:
# setup boto
original_boto3_version = boto3.__version__
%pip install 'boto3>1.17.21'

[0mNote: you may need to restart the kernel to use updated packages.


In [157]:
# setup feature store
region = boto3.Session().region_name
role = get_execution_role()

boto_session = boto3.Session(region_name=region)

sagemaker_client = boto_session.client(service_name="sagemaker", region_name=region)
featurestore_runtime = boto_session.client(
    service_name="sagemaker-featurestore-runtime", region_name=region
)

feature_store_session = Session(
    boto_session=boto_session,
    sagemaker_client=sagemaker_client,
    sagemaker_featurestore_runtime_client=featurestore_runtime,
)

In [158]:
# setup s3 bucket
default_s3_bucket_name = feature_store_session.default_bucket()
prefix = "homework-3-1"

print(default_s3_bucket_name)

sagemaker-us-east-1-711667138246


## Import data

In [159]:
# import housing data
housing = pd.read_csv("housing.csv")
housing.head(5)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [160]:
# import gmaps data
gmaps = pd.read_csv("housing_gmaps_data_raw.csv")
gmaps.head(5)

Unnamed: 0,street_number,route,locality-political,administrative_area_level_2-political,administrative_area_level_1-political,country-political,postal_code,address,longitude,latitude,...,establishment-natural_feature,airport-establishment-point_of_interest,political-sublocality-sublocality_level_1,administrative_area_level_3-political,post_box,establishment-light_rail_station-point_of_interest-transit_station,establishment-point_of_interest,aquarium-establishment-park-point_of_interest-tourist_attraction-zoo,campground-establishment-lodging-park-point_of_interest-rv_park-tourist_attraction,cemetery-establishment-park-point_of_interest
0,3130,Grizzly Peak Boulevard,Berkeley,Alameda County,California,United States,94705.0,"3130 Grizzly Peak Blvd, Berkeley, CA 94705, USA",-122.23,37.88,...,,,,,,,,,,
1,2005,Tunnel Road,Oakland,Alameda County,California,United States,94611.0,"2005 Tunnel Rd, Oakland, CA 94611, USA",-122.22,37.86,...,,,,,,,,,,
2,6886,Chabot Road,Oakland,Alameda County,California,United States,94618.0,"6886 Chabot Rd, Oakland, CA 94618, USA",-122.24,37.85,...,,,,,,,,,,
3,6365,Florio Street,Oakland,Alameda County,California,United States,94618.0,"6365 Florio St, Oakland, CA 94618, USA",-122.25,37.85,...,,,,,,,,,,
4,5407,Bryant Avenue,Oakland,Alameda County,California,United States,94618.0,"5407 Bryant Ave, Oakland, CA 94618, USA",-122.25,37.84,...,,,,,,,,,,


In [161]:
# merge the datasets based on latitude and longitude columns
df = pd.merge(housing, gmaps, on=['latitude', 'longitude'], how='outer')

df.to_csv("merged_data.csv", index=False)
df.head(5)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity,...,establishment-natural_feature,airport-establishment-point_of_interest,political-sublocality-sublocality_level_1,administrative_area_level_3-political,post_box,establishment-light_rail_station-point_of_interest-transit_station,establishment-point_of_interest,aquarium-establishment-park-point_of_interest-tourist_attraction-zoo,campground-establishment-lodging-park-point_of_interest-rv_park-tourist_attraction,cemetery-establishment-park-point_of_interest
0,-117.04,32.54,7.0,938.0,297.0,1187.0,282.0,1.2667,67500.0,NEAR OCEAN,...,,,Zona Norte,,,,,,,
1,-117.09,32.55,8.0,6533.0,1217.0,4797.0,1177.0,3.9583,144400.0,NEAR OCEAN,...,,,,,,,,,,
2,-117.06,32.55,5.0,3223.0,940.0,3284.0,854.0,1.4384,108800.0,NEAR OCEAN,...,,,,,,,,,,
3,-117.04,32.55,15.0,2206.0,648.0,2511.0,648.0,1.6348,93200.0,NEAR OCEAN,...,,,,,,,,,,
4,-117.12,32.56,20.0,2524.0,682.0,1819.0,560.0,2.9286,257700.0,NEAR OCEAN,...,,,,,,,,,,


In [162]:
df.dtypes

longitude                                                                             float64
latitude                                                                              float64
housing_median_age                                                                    float64
total_rooms                                                                           float64
total_bedrooms                                                                        float64
population                                                                            float64
households                                                                            float64
median_income                                                                         float64
median_house_value                                                                    float64
ocean_proximity                                                                        object
street_number                                               

## Feature Engineering
primary_key - neighborhood derived from neighborhood-political
event_time - calculated using python
<1h ocean
          one hot encoded column derived from ocean_proximity
inland
          one hot encoded column derived from ocean_proximity
island
          one hot encoded column derived from ocean_proximity
near bay
          one hot encoded column derived from ocean_proximity
near ocean
          one hot encoded column derived from ocean_proximity
median house value
          derived from median_house_value
          average this value across all records for a neighborhood
          cap this value at 500,000
median house age
          derived from median_house_age
          average this value across all records for a neighborhood
          discretized by groups of 10 years i.e. 0-9, 10-19, 20-29, etc…
total households
          derived from households
          average this value across all records for a neighborhood
          must be an integer (round up if needed)
bedrooms per household
          derived from total_bedrooms and households
impute missing values by getting average for locality-code
          locality-code
encoded locality-political value


In [163]:
# create a new dataset with relevant columns
neighborhood = df[['neighborhood-political', 
                   'ocean_proximity', 
                   'median_house_value',
                   'housing_median_age',
                   'households',
                   'total_bedrooms',
                   'locality-political']]

# drop rows where 'neighborhood-political' is NaN
neighborhood = neighborhood.dropna(subset=['neighborhood-political'])

# drop duplicates to keep only distinct values
neighborhood = neighborhood.drop_duplicates()

neighborhood.head()

Unnamed: 0,neighborhood-political,ocean_proximity,median_house_value,housing_median_age,households,total_bedrooms,locality-political
1,Tijuana River Valley,NEAR OCEAN,144400.0,8.0,1177.0,1217.0,San Diego
2,Tijuana River Valley,NEAR OCEAN,108800.0,5.0,854.0,940.0,San Diego
3,San Ysidro,NEAR OCEAN,93200.0,15.0,648.0,648.0,San Diego
5,Egger Highlands,NEAR OCEAN,146800.0,16.0,480.0,501.0,San Diego
6,Nestor,NEAR OCEAN,151500.0,8.0,172.0,156.0,San Diego


In [164]:
# rename neighborhood-political column
neighborhood.rename(columns={'neighborhood-political': 'neighborhood'}, inplace=True)

# calculate event_time
neighborhood['event_time'] = pd.Timestamp.now()
neighborhood.head()

Unnamed: 0,neighborhood,ocean_proximity,median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time
1,Tijuana River Valley,NEAR OCEAN,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 19:20:39.005657
2,Tijuana River Valley,NEAR OCEAN,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 19:20:39.005657
3,San Ysidro,NEAR OCEAN,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 19:20:39.005657
5,Egger Highlands,NEAR OCEAN,146800.0,16.0,480.0,501.0,San Diego,2024-05-26 19:20:39.005657
6,Nestor,NEAR OCEAN,151500.0,8.0,172.0,156.0,San Diego,2024-05-26 19:20:39.005657


In [165]:
# one-hot encode 'ocean_proximity' column
neighborhood = pd.get_dummies(neighborhood, columns=['ocean_proximity'])

# rename columns
neighborhood.rename(columns={'ocean_proximity_<1H OCEAN': '1h_ocean'}, inplace=True)
neighborhood.rename(columns={'ocean_proximity_INLAND': 'island'}, inplace=True)
neighborhood.rename(columns={'ocean_proximity_ISLAND': 'island'}, inplace=True)
neighborhood.rename(columns={'ocean_proximity_NEAR BAY': 'near_bay'}, inplace=True)
neighborhood.rename(columns={'ocean_proximity_NEAR OCEAN': 'near_ocean'}, inplace=True)

neighborhood.head()

Unnamed: 0,neighborhood,median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time,1h_ocean,island,near_bay,near_ocean
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True
5,Egger Highlands,146800.0,16.0,480.0,501.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True
6,Nestor,151500.0,8.0,172.0,156.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True


In [166]:
# Derive 'median_house_value' by averaging across neighborhoods and capping at 500,000
neighborhood['new_median_house_value'] = neighborhood.groupby('neighborhood')['median_house_value'].transform('mean')
neighborhood['new_median_house_value'] = neighborhood['new_median_house_value'].apply(lambda x: min(x, 500000))

# Derive 'median_house_age' by averaging across neighborhoods and discretizing into groups of 10 years
neighborhood['median_house_age'] = neighborhood.groupby('neighborhood')['housing_median_age'].transform('mean')
neighborhood['median_house_age'] = pd.cut(neighborhood['median_house_age'], bins=range(0, 100, 10), labels=range(0, 90, 10))
neighborhood.head()

Unnamed: 0,neighborhood,median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time,1h_ocean,island,near_bay,near_ocean,new_median_house_value,median_house_age
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,126600.0,0
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,126600.0,0
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,124550.0,10
5,Egger Highlands,146800.0,16.0,480.0,501.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,128177.777778,20
6,Nestor,151500.0,8.0,172.0,156.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,132500.0,10


In [167]:
neighborhood.head(100)

Unnamed: 0,neighborhood,median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time,1h_ocean,island,near_bay,near_ocean,new_median_house_value,median_house_age
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,126600.000000,0
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,126600.000000,0
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,124550.000000,10
5,Egger Highlands,146800.0,16.0,480.0,501.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,128177.777778,20
6,Nestor,151500.0,8.0,172.0,156.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,132500.000000,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,Logan Heights,86500.0,48.0,132.0,180.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,82425.000000,40
272,Logan Heights,89400.0,44.0,212.0,218.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,82425.000000,40
273,Logan Heights,90000.0,47.0,174.0,161.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,82425.000000,40
274,Logan Heights,75600.0,48.0,231.0,230.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,82425.000000,40


In [168]:
# Derive 'total_households' by averaging across neighborhoods and rounding up to the nearest integer
neighborhood['total_households'] = neighborhood.groupby('neighborhood')['households'].transform('mean').apply(lambda x: round(x))

# Derive 'total_bredrooms' by averaging across neighborhoods and rounding up to the nearest integer
neighborhood['new_total_bedrooms'] = neighborhood.groupby('neighborhood')['total_bedrooms'].transform('mean')
neighborhood['new_total_bedrooms'] = neighborhood['new_total_bedrooms'].transform(lambda x: round(x))

# Derive 'bedrooms_per_household'
neighborhood['bedrooms_per_household'] = neighborhood['new_total_bedrooms'] / neighborhood['total_households']
neighborhood.head()

Unnamed: 0,neighborhood,median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time,1h_ocean,island,near_bay,near_ocean,new_median_house_value,median_house_age,total_households,new_total_bedrooms,bedrooms_per_household
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,126600.0,0,1016,1078.0,1.061024
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,126600.0,0,1016,1078.0,1.061024
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,124550.0,10,546,562.0,1.029304
5,Egger Highlands,146800.0,16.0,480.0,501.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,128177.777778,20,493,516.0,1.046653
6,Nestor,151500.0,8.0,172.0,156.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,132500.0,10,534,556.0,1.041199


In [169]:
from sklearn.preprocessing import OrdinalEncoder

# Create an OrdinalEncoder instance
ordinal_encoder = OrdinalEncoder()

# Reshape the 'locality-political' column to a 2D array
locality_political_column = neighborhood['locality-political'].values.reshape(-1, 1)

# Fit the OrdinalEncoder to the 'locality-political' column and transform it
neighborhood['locality-code'] = ordinal_encoder.fit_transform(locality_political_column)


In [170]:
# Calculate the mean 'locality-code' value across the entire dataset
mean_locality_code = neighborhood['locality-code'].mean()

# Impute missing values in 'locality-code' with the mean value
neighborhood['locality-code'] = neighborhood['locality-code'].fillna(mean_locality_code)

neighborhood.head()

Unnamed: 0,neighborhood,median_house_value,housing_median_age,households,total_bedrooms,locality-political,event_time,1h_ocean,island,near_bay,near_ocean,new_median_house_value,median_house_age,total_households,new_total_bedrooms,bedrooms_per_household,locality-code
1,Tijuana River Valley,144400.0,8.0,1177.0,1217.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,126600.0,0,1016,1078.0,1.061024,163.0
2,Tijuana River Valley,108800.0,5.0,854.0,940.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,126600.0,0,1016,1078.0,1.061024,163.0
3,San Ysidro,93200.0,15.0,648.0,648.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,124550.0,10,546,562.0,1.029304,163.0
5,Egger Highlands,146800.0,16.0,480.0,501.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,128177.777778,20,493,516.0,1.046653,163.0
6,Nestor,151500.0,8.0,172.0,156.0,San Diego,2024-05-26 19:20:39.005657,False,False,False,True,132500.0,10,534,556.0,1.041199,163.0


In [171]:
# Save the processed DataFrame to a new CSV file
processed_file_path = "processed_data.csv"
df.to_csv(processed_file_path, index=False)

## Ingest Data into FeatureStore

In [172]:
# define feature group name
feature_group_name = "homework"

# define feature group
feature_group = FeatureGroup(
    name=feature_group_name, sagemaker_session=feature_store_session
)

# check column types
neighborhood.dtypes

neighborhood                      object
median_house_value               float64
housing_median_age               float64
households                       float64
total_bedrooms                   float64
locality-political                object
event_time                datetime64[us]
1h_ocean                            bool
island                              bool
near_bay                            bool
near_ocean                          bool
new_median_house_value           float64
median_house_age                category
total_households                   int64
new_total_bedrooms               float64
bedrooms_per_household           float64
locality-code                    float64
dtype: object

In [173]:
# convert event time to string
neighborhood['event_time'] = pd.to_datetime(neighborhood['event_time']).dt.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

In [174]:
# function to convert object or category columns to string
def cast_object_to_string(data_frame):
    for label in data_frame.columns:
        if data_frame.dtypes[label] == "object":
            data_frame[label] = data_frame[label].astype("str").astype("string")
        if data_frame.dtypes[label] == "category":
            data_frame[label] = data_frame[label].astype("str").astype("string")

# update neighborhood dataframe
cast_object_to_string(neighborhood)

In [175]:
# function to convert bool columns to int
def cast_bool_to_int(data_frame):
    for label in data_frame.columns:
        if data_frame.dtypes[label] == "bool":
            data_frame[label] =  data_frame[label].astype(int)

# update neighborhood dataframe
cast_bool_to_int(neighborhood)

In [176]:
neighborhood.dtypes

neighborhood              string[python]
median_house_value               float64
housing_median_age               float64
households                       float64
total_bedrooms                   float64
locality-political        string[python]
event_time                string[python]
1h_ocean                           int64
island                             int64
near_bay                           int64
near_ocean                         int64
new_median_house_value           float64
median_house_age          string[python]
total_households                   int64
new_total_bedrooms               float64
bedrooms_per_household           float64
locality-code                    float64
dtype: object

In [177]:
# load feature definitions to the feature group
feature_group.load_feature_definitions(data_frame=neighborhood)

[FeatureDefinition(feature_name='neighborhood', feature_type=<FeatureTypeEnum.STRING: 'String'>),
 FeatureDefinition(feature_name='median_house_value', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='housing_median_age', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='households', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='total_bedrooms', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='locality-political', feature_type=<FeatureTypeEnum.STRING: 'String'>),
 FeatureDefinition(feature_name='event_time', feature_type=<FeatureTypeEnum.STRING: 'String'>),
 FeatureDefinition(feature_name='1h_ocean', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='island', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='near_bay', feature_type=<FeatureTypeEnu

## Create FeatureGroups in SageMaker FeatureStore

In [181]:
def wait_for_feature_group_creation_complete(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    while status == "Creating":
        print("Waiting for Feature Group Creation")
        time.sleep(5)
        status = feature_group.describe().get("FeatureGroupStatus")
    if status != "Created":
        raise RuntimeError(f"Failed to create feature group {feature_group.name}")
    print(f"FeatureGroup {feature_group.name} successfully created.")


# record identifier and event time feature names
record_identifier_feature_name = "neighborhood"
event_time_feature_name = "event_time"    

feature_group.create(
    s3_uri=f"s3://{default_s3_bucket_name}/{prefix}",
    record_identifier_name=record_identifier_feature_name,
    event_time_feature_name=event_time_feature_name,
    role_arn=role,
    enable_online_store=True,
)

wait_for_feature_group_creation_complete(feature_group=feature_group)

Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
FeatureGroup homework successfully created.


In [None]:
# check that the feature group exists
feature_group.describe()

## Ingest Data into FeatureStore

In [184]:
# ingest data
feature_group.ingest(data_frame=neighborhood, max_workers=3, wait=True)

IngestionManagerPandas(feature_group_name='homework', sagemaker_session=<sagemaker.session.Session object at 0x7fec126e7fa0>, data_frame=               neighborhood  median_house_value  housing_median_age  \
1      Tijuana River Valley            144400.0                 8.0   
2      Tijuana River Valley            108800.0                 5.0   
3                San Ysidro             93200.0                15.0   
5           Egger Highlands            146800.0                16.0   
6                    Nestor            151500.0                 8.0   
...                     ...                 ...                 ...   
20540               Pacific             92600.0                18.0   
20542               Pacific             78800.0                13.0   
20551                 Butte            165600.0                14.0   
20557            Fieldbrook            102700.0                21.0   
20617   Crescent City North             73200.0                11.0   

       hou

## Query Feature Values
1. Brooktree
2. Fisherman’s Wharf
3. Los Osos

In [186]:
# brooktree
featurestore_runtime.get_record(
    FeatureGroupName=feature_group_name,
    RecordIdentifierValueAsString="Brooktree",
)

{'ResponseMetadata': {'RequestId': '439d59f2-3e7c-49f0-99f4-448c5dbeb0ac',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '439d59f2-3e7c-49f0-99f4-448c5dbeb0ac',
   'content-type': 'application/json',
   'content-length': '1179',
   'date': 'Sun, 26 May 2024 19:26:45 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'neighborhood', 'ValueAsString': 'Brooktree'},
  {'FeatureName': 'median_house_value', 'ValueAsString': '257400.0'},
  {'FeatureName': 'housing_median_age', 'ValueAsString': '9.0'},
  {'FeatureName': 'households', 'ValueAsString': '1438.0'},
  {'FeatureName': 'locality-political', 'ValueAsString': 'San Jose'},
  {'FeatureName': 'event_time',
   'ValueAsString': '2024-05-26T19:20:39.005657Z'},
  {'FeatureName': '1h_ocean', 'ValueAsString': '1'},
  {'FeatureName': 'island', 'ValueAsString': '0'},
  {'FeatureName': 'near_bay', 'ValueAsString': '0'},
  {'FeatureName': 'near_ocean', 'ValueAsString': '0'},
  {'FeatureName': 'new_median_house_value', 'Value

In [187]:
# fisherman's wharf
featurestore_runtime.get_record(
    FeatureGroupName=feature_group_name,
    RecordIdentifierValueAsString="Fisherman's Wharf",
)

{'ResponseMetadata': {'RequestId': '445c87e7-f6ce-46cb-b860-aa1c4388a51f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '445c87e7-f6ce-46cb-b860-aa1c4388a51f',
   'content-type': 'application/json',
   'content-length': '1450',
   'date': 'Sun, 26 May 2024 19:26:56 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'neighborhood',
   'ValueAsString': "Fisherman's Wharf"},
  {'FeatureName': 'median_house_value', 'ValueAsString': '500001.0'},
  {'FeatureName': 'housing_median_age', 'ValueAsString': '52.0'},
  {'FeatureName': 'households', 'ValueAsString': '250.0'},
  {'FeatureName': 'total_bedrooms', 'ValueAsString': '317.0'},
  {'FeatureName': 'locality-political', 'ValueAsString': 'San Francisco'},
  {'FeatureName': 'event_time',
   'ValueAsString': '2024-05-26T19:20:39.005657Z'},
  {'FeatureName': '1h_ocean', 'ValueAsString': '0'},
  {'FeatureName': 'island', 'ValueAsString': '0'},
  {'FeatureName': 'near_bay', 'ValueAsString': '1'},
  {'FeatureName': 'near_oc

In [188]:
# los osos
featurestore_runtime.get_record(
    FeatureGroupName=feature_group_name,
    RecordIdentifierValueAsString="Los Osos",
)

{'ResponseMetadata': {'RequestId': '5ca70007-8d3b-4daa-9794-89542b970767',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '5ca70007-8d3b-4daa-9794-89542b970767',
   'content-type': 'application/json',
   'content-length': '1455',
   'date': 'Sun, 26 May 2024 19:27:15 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'neighborhood', 'ValueAsString': 'Los Osos'},
  {'FeatureName': 'median_house_value', 'ValueAsString': '192700.0'},
  {'FeatureName': 'housing_median_age', 'ValueAsString': '14.0'},
  {'FeatureName': 'households', 'ValueAsString': '755.0'},
  {'FeatureName': 'total_bedrooms', 'ValueAsString': '787.0'},
  {'FeatureName': 'locality-political', 'ValueAsString': 'Baywood-Los Osos'},
  {'FeatureName': 'event_time',
   'ValueAsString': '2024-05-26T19:20:39.005657Z'},
  {'FeatureName': '1h_ocean', 'ValueAsString': '0'},
  {'FeatureName': 'island', 'ValueAsString': '0'},
  {'FeatureName': 'near_bay', 'ValueAsString': '0'},
  {'FeatureName': 'near_ocean', 'Va

## Shut down notebook resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}