In [1]:
import hopsworks
import os
import great_expectations as ge
from great_expectations.core import ExpectationSuite, ExpectationConfiguration

import math
import pandas as pd

In [2]:
# Connect to the Hopsworks Feature Store
project = hopsworks.login()
fs = project.get_feature_store()

2024-10-17 14:34:49,463 INFO: Python Engine initialized.

Logged in to project, explore it here https://demo.hops.works/p/123


In [3]:
# Read the historical transaction data
transactions_pdf = pd.read_csv(f"{os.environ['PROJECT_PATH']}/RawData/historical_transactions.csv", parse_dates=['datetime'])

In [4]:
transactions_pdf = transactions_pdf[["datetime", "cc_num", "latitude", 'longitude']]

# cc_num is more of an account_id rather than a proper credit card number, rename the column to avoid confusion
transactions_pdf.rename(columns={'cc_num': 'account_id', 'datetime': 'transaction_datetime'}, inplace=True) 

In [5]:
profiles_last_transaction_pdf = transactions_pdf.sort_values('transaction_datetime').groupby('account_id').tail(1)

In [6]:
profiles_last_transaction_pdf

Unnamed: 0,transaction_datetime,account_id,latitude,longitude
46092,2024-09-19 14:07:29,77025ad95578f857bd6a7390b92e8682,39.717340,-74.969330
47607,2024-09-24 13:43:16,018ff07230cc505876224e941dfd6096,42.527870,-70.928660
48423,2024-09-27 07:15:32,7d772777e1409e7191e646e58811b444,39.334270,-76.439410
48710,2024-09-28 07:00:31,858eeba131592e1a20bba8a83290feb9,31.845680,-102.367640
62072,2024-09-28 19:25:57,46625b13e486eddd36653c86357b99e5,29.657426,-82.316532
...,...,...,...,...
53995,2024-10-15 21:58:15,71884f7fc735581e5151d76e61e22740,35.052660,-78.878360
53996,2024-10-15 22:01:05,3f6bf7522fe533c8ba0c802d14197bf2,33.352830,-111.789030
53997,2024-10-15 22:03:20,b27647b73dcc913bf07f58ab066e32e8,42.583420,-71.802300
53998,2024-10-15 22:06:34,0eea379415b6009b54f528c1f79a78f9,40.557600,-74.284590


In [7]:
# Create the feature group metadata
profiles_last_transaction_fg = fs.get_or_create_feature_group(
    name="profiles_last_transaction",
    version=1,
    description="Last transaction time and location for each account",
    primary_key=['account_id'],
    event_time='transaction_datetime',
    online_enabled=True,
    statistics_config={'histograms': True, 'correlations': True}
)

In [8]:
# Insert data into feature group
profiles_last_transaction_fg.insert(profiles_last_transaction_pdf)

Feature Group created successfully, explore it at 
https://demo.hops.works/p/123/fs/68/fg/1075


Uploading Dataframe: 100.00% |██████████| Rows 1000/1000 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: profiles_last_transaction_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://demo.hops.works/p/123/jobs/named/profiles_last_transaction_1_offline_fg_materialization/executions


(Job('profiles_last_transaction_1_offline_fg_materialization', 'SPARK'), None)