## <span style="color:#ff5f27;"> 💽 Loading the Data </span>

In [1]:
#!pip install -U hopsworks --quiet

In [2]:
#!pip install kaggle --quiet

In [1]:
import numpy as np
import pandas as pd
import gdown

## <span style="color:#ff5f27;"> 🛠️ Feature Engineering </span>

## <span style="color:#ff5f27;"> 🪄 Creating Feature Groups in Hopsworks </span>

In [8]:
import hopsworks as hs
project = hs.login()
fs = project.get_feature_store()

  from .autonotebook import tqdm as notebook_tqdm


Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/550037




Connected. Call `.close()` to terminate connection gracefully.


In [14]:
# Ensure all ID columns are integers
df['beer_beerid'] = pd.to_numeric(df['beer_beerid'], errors='coerce').astype('Int64')

# Drop any rows where the primary key is NaN
beer_features = df[['beer_beerid', 'beer_name', 'beer_brewerid', 'beer_abv', 'beer_style']].drop_duplicates(subset=['beer_beerid'])
beer_features = beer_features.dropna(subset=['beer_beerid'])

# Feature group for Beers
beer_fg = fs.get_or_create_feature_group(
    name="beer_features",
    version=1,
    description="Basic information about beers",
    primary_key=['beer_beerid'],
    online_enabled=True
)

# Inserting the data
beer_fg.insert(beer_features)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/550037/fs/545860/fg/772468


Uploading Dataframe: 100.00% |██████████| Rows 110364/110364 | Elapsed Time: 00:15 | Remaining Time: 00:00


Launching job: beer_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/550037/jobs/named/beer_features_1_offline_fg_materialization/executions


In [17]:
# update beer feature descriptions
beer_feature_descriptions = {
    "beer_beerid": "Unique identifier for each beer.",
    "beer_name": "Name of the beer.",
    "beer_brewerid": "Identifier for the brewer of the beer.",
    "beer_abv": "Alcohol by volume percentage of the beer.",
    "beer_style": "Style or category of the beer."
}

# Update descriptions for Beer Features
for feature, description in beer_feature_descriptions.items():
    beer_fg.update_feature_description(feature, description)


In [15]:
# Feature Group for Reviews
review_features = df[['review_appearance', 'review_aroma', 'review_palate', 'review_taste', 'review_overall', 'beer_beerid']]
review_fg = fs.get_or_create_feature_group(
    name="review_features",
    version=1,
    description="Metrics about beer reviews",
    primary_key=['beer_beerid'],
    online_enabled=True
)
review_fg.insert(review_features)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/550037/fs/545860/fg/774499


Uploading Dataframe: 100.00% |██████████| Rows 2924163/2924163 | Elapsed Time: 03:32 | Remaining Time: 00:00


Launching job: review_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/550037/jobs/named/review_features_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x1d5843b90>, None)

In [18]:
# update review feature descriptions
review_feature_descriptions = {
    "review_appearance": "Rating of the beer's appearance (1-5).",
    "review_aroma": "Rating of the beer's aroma (1-5).",
    "review_palate": "Rating of the beer's palate (1-5).",
    "review_taste": "Rating of the beer's taste (1-5).",
    "review_overall": "Overall rating of the beer (1-5).",
    "beer_beerid": "Associated unique identifier for each beer."
}

# Update descriptions for Review Features
for feature, description in review_feature_descriptions.items():
    review_fg.update_feature_description(feature, description)

In [16]:
# Feature Group for reviewer profiles
reviewer_profile_features = df[['review_profilename']].drop_duplicates()
reviewer_fg = fs.get_or_create_feature_group(
    name="reviewer_profile_features",
    version=1,
    description="Profile information of reviewers",
    primary_key=['review_profilename'],
    online_enabled=True
)
reviewer_fg.insert(reviewer_profile_features)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/550037/fs/545860/fg/775517


Uploading Dataframe: 100.00% |██████████| Rows 29265/29265 | Elapsed Time: 00:07 | Remaining Time: 00:00


Launching job: reviewer_profile_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/550037/jobs/named/reviewer_profile_features_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x1d5841520>, None)

In [19]:
# update reviewer profile feature descriptions
reviewer_profile_feature_descriptions = {
    "review_profilename": "Unique username of the reviewer."
}

# Update descriptions for Reviewer Profile Features
for feature, description in reviewer_profile_feature_descriptions.items():
    reviewer_fg.update_feature_description(feature, description)