In [148]:
!rm -rf .ipynb_checkpoints
!rm -rf __pycache__
!feast apply

Updated feature view [1m[33mfact_feature_view[0m
	entities: [1m[33m['host_id'][0m -> [1m[92m['id'][0m
	entity_columns: [1m[33m[name: "host_id"
value_type: INT64
][0m -> [1m[92m[name: "id"
value_type: INT64
][0m
Updated feature view [1m[33mreview_feature_view[0m
	entities: [1m[33m['host_id'][0m -> [1m[92m['id'][0m
	entity_columns: [1m[33m[name: "host_id"
value_type: INT64
][0m -> [1m[92m[name: "id"
value_type: INT64
][0m

[1m[94mNo changes to infrastructure


In [149]:
import pandas as pd
from sqlalchemy import create_engine

db_config = {
    'user': 'admin',
    'password': 'admin123',
    'host': 'feast_postgres',
    'port': '5432',
    'database': 'feast_postgres'
}

connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

In [150]:
from feast import FeatureStore

listing_query = "SELECT * FROM listing_table"
host_query = "SELECT * FROM host_table"
review_query = "SELECT *, event_timestamp as review_event_timestamp FROM review_table"
fact_query = "SELECT *, event_timestamp as fact_event_timestamp FROM fact_table"

listing_df = pd.read_sql(listing_query, engine)
host_df = pd.read_sql(host_query, engine)
review_df = pd.read_sql(review_query, engine)
fact_df = pd.read_sql(fact_query, engine)

listing_df["event_timestamp"] = pd.to_datetime(listing_df["event_timestamp"])
host_df["event_timestamp"] = pd.to_datetime(host_df["event_timestamp"])
review_df["review_event_timestamp"] = pd.to_datetime(review_df["review_event_timestamp"])
fact_df["fact_event_timestamp"] = pd.to_datetime(fact_df["fact_event_timestamp"])

fs = FeatureStore(repo_path=".")



In [151]:
listing_features = [
    "listing_feature_view:host_id",
    "listing_feature_view:property_type",
    "listing_feature_view:room_type",
    "listing_feature_view:accommodates",
    "listing_feature_view:bathrooms",
    "listing_feature_view:bedrooms",
    "listing_feature_view:beds",
    "listing_feature_view:amenities",
    "listing_feature_view:has_availability",
    "listing_feature_view:availability_30",
    "listing_feature_view:availability_60",
    "listing_feature_view:availability_90",
    "listing_feature_view:availability_365",
    "listing_feature_view:instant_bookable"
]

listing_df = fs.get_historical_features(
    entity_df=listing_df[["id", "event_timestamp"]],
    features=listing_features
).to_df()

print("Listing Features Data:")
print(listing_features_df.head())

Listing Features Data:
         id            event_timestamp   host_id         property_type  \
0   1489424 2024-05-14 08:21:02.644933   5294164  Private room in home   
1   3820211 2024-05-14 08:21:02.644933  19648678    Entire rental unit   
2   5651579 2024-05-14 08:21:02.644933  29288920    Entire rental unit   
3  10768745 2024-05-14 08:21:02.644933   5691268           Entire home   
4   1854638 2024-05-14 08:21:02.644933   9075763    Entire rental unit   

         room_type  accommodates  bathrooms  bedrooms  beds  \
0     Private room             2        1.0       1.0   1.0   
1  Entire home/apt             2        1.0       1.0   1.0   
2  Entire home/apt             2        1.0       0.0   1.0   
3  Entire home/apt             1        1.0       1.0   1.0   
4  Entire home/apt             2        1.0       1.0   1.0   

                                           amenities      price  \
0  "Oven", "Central air conditioning", "Wifi", "C...   50.00000   
1  "Iron", "Oven", 

In [152]:
host_features = [
    "host_feature_view:host_response_rate",
    "host_feature_view:host_acceptance_rate",
    "host_feature_view:host_is_superhost",
    "host_feature_view:host_listings_count",
    "host_feature_view:host_total_listings_count",
    "host_feature_view:host_verifications",
    "host_feature_view:host_has_profile_pic",
    "host_feature_view:host_identity_verified",
]

host_features_df = fs.get_historical_features(
    entity_df=host_df[["host_id", "event_timestamp"]],
    features=host_features
).to_df()

print("Host Features Data:")
print(host_features_df.head())

Host Features Data:
    host_id            event_timestamp  host_response_rate  \
0   5294164 2024-05-14 08:21:16.830847               100.0   
1  19648678 2024-05-14 08:21:16.830847               100.0   
2  17766924 2024-05-14 08:21:16.830847               100.0   
3   5691268 2024-05-14 08:21:16.830847                98.0   
4   4259750 2024-05-14 08:21:16.830847                94.0   

   host_acceptance_rate  host_is_superhost  host_listings_count  \
0                  84.0                0.0                  1.0   
1                  68.0                0.0                  4.0   
2                 100.0                0.0                  1.0   
3                  87.0                1.0                  2.0   
4                  88.0                1.0                  9.0   

   host_total_listings_count host_verifications  host_has_profile_pic  \
0                        2.0       email, phone                   1.0   
1                        7.0       email, phone           

In [153]:
review_features = [
    "review_feature_view:number_of_reviews",
    "review_feature_view:number_of_reviews_ltm",
    "review_feature_view:number_of_reviews_l30d",
    "review_feature_view:review_scores_rating",
    "review_feature_view:review_scores_accuracy",
    "review_feature_view:review_scores_cleanliness",
    "review_feature_view:review_scores_checkin",
    "review_feature_view:review_scores_communication",
    "review_feature_view:review_scores_value",
    "review_feature_view:reviews_per_month",
]

review_features_df = fs.get_historical_features(
    entity_df=review_df[["id", "event_timestamp"]],
    features=review_features
).to_df()

print("Review Features Data:")
print(review_features_df.head())


UndefinedColumn: column "review_event_timestamp" does not exist
LINE 55:         "review_event_timestamp" as event_timestamp,
                 ^


In [154]:
fact_features = [
    "fact_feature_view:minimum_nights",
    "fact_feature_view:maximum_nights",
    "fact_feature_view:minimum_minimum_nights",
    "fact_feature_view:maximum_minimum_nights",
    "fact_feature_view:minimum_maximum_nights",
    "fact_feature_view:maximum_maximum_nights",
    "fact_feature_view:minimum_nights_avg_ntm",
    "fact_feature_view:maximum_nights_avg_ntm",
    "fact_feature_view:calculated_host_listings_count",
    "fact_feature_view:calculated_host_listings_count_entire_homes",
    "fact_feature_view:calculated_host_listings_count_private_rooms",
    "fact_feature_view:calculated_host_listings_count_shared_rooms",
]

fact_features_df = fs.get_historical_features(
    entity_df=fact_df[["id", "event_timestamp"]],
    features=fact_features
).to_df()

print("Fact Features Data:")
print(fact_features_df.head())


UndefinedColumn: column "fact_event_timestamp" does not exist
LINE 55:         "fact_event_timestamp" as event_timestamp,
                 ^
