## <span style='color:#ff5f27'> 📝 Imports

In [1]:
from math import radians

import numpy as np
import pandas as pd

from features import transactions_fraud

# Mute warnings
import warnings
warnings.filterwarnings("ignore")

## <span style="color:#ff5f27;"> 💽 Loading the Data </span>

In [2]:
# Read the profiles data from a CSV file
profiles_df = pd.read_csv(
    "https://repo.hops.works/master/hopsworks-tutorials/data/card_fraud_online/profiles.csv", 
    parse_dates=["birthdate"],
)

# Rename columns for clarity
profiles_df.columns = ["name", "gender", "mail", "birthdate", "City", "Country", "cc_num"]

# Select only the relevant columns ('cc_num' and 'gender')
profiles_df = profiles_df[["cc_num", "gender"]]

# Display the first three rows of the DataFrame
profiles_df.head(3)

Unnamed: 0,cc_num,gender
0,4796807885357879,F
1,4529266636192966,F
2,4922690008243953,F


In [3]:
# Read the transactions data from a CSV file
trans_df = pd.read_csv(
    "https://repo.hops.works/master/hopsworks-tutorials/data/card_fraud_online/transactions.csv", 
    parse_dates=["datetime"],
)

# Display the first three rows of the DataFrame
trans_df.head(3)

Unnamed: 0,tid,datetime,cc_num,category,amount,latitude,longitude,city,country,fraud_label
0,11df919988c134d97bbff2678eb68e22,2022-01-01 00:00:24,4473593503484549,Health/Beauty,62.95,42.30865,-83.48216,Canton,US,0
1,dd0b2d6d4266ccd3bf05bc2ea91cf180,2022-01-01 00:00:56,4272465718946864,Grocery,85.45,33.52253,-117.70755,Laguna Niguel,US,0
2,e627f5d9a9739833bd52d2da51761fc3,2022-01-01 00:02:32,4104216579248948,Domestic Transport,21.63,37.60876,-77.37331,Mechanicsville,US,0


In [4]:
# Filter transactions DataFrame to include only rows with category "Cash Withdrawal"
trans_df = trans_df[trans_df.category == "Cash Withdrawal"].reset_index(level=0, drop=True)

# Fill missing values in the 'country' column with "US"
trans_df["country"] = trans_df["country"].fillna("US")

# Filter profiles DataFrame to include only rows with credit card numbers present in the filtered transactions DataFrame
profiles_df = profiles_df[profiles_df.cc_num.isin(trans_df.cc_num.unique())].reset_index(level=0, drop=True)

In [5]:
# Sort the transactions DataFrame by 'datetime' and 'cc_num'
trans_df.sort_values(["datetime", "cc_num"], inplace=True)

---

## <span style="color:#ff5f27;"> 🛠️ Feature Engineering </span>

In [6]:
# Use the prepare_transactions_fraud function to process the trans_df DataFrame
trans_df = transactions_fraud.prepare_transactions_fraud(trans_df)

# Display the first three rows of the modified DataFrame
trans_df.head(3)

Unnamed: 0,tid,datetime,cc_num,amount,country,fraud_label,loc_delta_t_plus_1,loc_delta_t_minus_1,time_delta_t_minus_1
0,4c51b54665c7ddb466ea5936f4f3a428,2022-01-01 08:11:01,4467360740682089,77.77,US,0,0.0,0.000148,0.333333
1,4c30185aea2e28e7d9797004710e13c6,2022-01-01 10:03:42,4700702588013561,781.27,US,0,0.0,7e-05,0.416667
2,1a109febabc5c36409f2caf729e110d3,2022-01-01 10:08:59,4205094877256105,36.25,US,0,0.0,0.000108,0.333333


---

## <span style="color:#ff5f27;"> 📡 Connecting to Hopsworks Feature Store </span>

### <span style="color:#ff5f27;"> 🪄 Creating Feature Groups </span>


In [7]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

2025-06-26 10:51:40,162 INFO: Initializing external client
2025-06-26 10:51:40,162 INFO: Base URL: https://10.87.42.15:28181
2025-06-26 10:51:40,956 INFO: Python Engine initialized.

Logged in to project, explore it here https://10.87.42.15:28181/p/119


In [8]:
# Get or create the 'transactions_fraud_online_fg' feature group
trans_fg = fs.get_or_create_feature_group(
    name="transactions_fraud_online_fg",
    version=1,
    description="Transaction data",
    primary_key=['cc_num'],
    event_time='datetime',
    online_enabled=True,
)

In [9]:
# Insert data into feature group
trans_fg.insert(trans_df)
print('✅ Done!')

Feature Group created successfully, explore it at 
https://10.87.42.15:28181/p/119/fs/67/fg/1037


Uploading Dataframe: 100.00% |█████████████████████████████████████████████████████████████████████████████████████████████████| Rows 365112/365112 | Elapsed Time: 02:18 | Remaining Time: 00:00


Launching job: transactions_fraud_online_fg_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://10.87.42.15:28181/p/119/jobs/named/transactions_fraud_online_fg_1_offline_fg_materialization/executions
✅ Done!


In [10]:
# Update feature descriptions
feature_descriptions = [
    {"name": "tid", "description": "Transaction id"},
    {"name": "datetime", "description": "Transaction time"},
    {"name": "cc_num", "description": "Number of the credit card performing the transaction"},
    {"name": "amount", "description": "Dollar amount of the transaction"},
    {"name": "country", "description": "Country in which the transaction was made"},
    {"name": "fraud_label", "description": "Whether the transaction was fraudulent or not"},
    {"name": "loc_delta_t_minus_1", "description": "Location of previous transaction"},
    {"name": "time_delta_t_minus_1", "description": "Time of previous transaction"},    
]

for desc in feature_descriptions: 
    trans_fg.update_feature_description(desc["name"], desc["description"])

You can move on and do the same thing for the profile and label feature groups.

In [11]:
# Get or create the 'profile_fraud_online_fg' feature group
profile_fg = fs.get_or_create_feature_group(
    name="profile_fraud_online_fg",
    version=1,
    description="Credit card holder demographic data",
    primary_key=['cc_num'],
    online_enabled=True,
)
# Insert data into feature group
profile_fg.insert(profiles_df)
print('✅ Done!')

Feature Group created successfully, explore it at 
https://10.87.42.15:28181/p/119/fs/67/fg/1038


Uploading Dataframe: 100.00% |█████████████████████████████████████████████████████████████████████████████████████████████████████| Rows 1000/1000 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: profile_fraud_online_fg_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://10.87.42.15:28181/p/119/jobs/named/profile_fraud_online_fg_1_offline_fg_materialization/executions
✅ Done!


In [12]:
# Update feature descriptions
feature_descriptions = [
    {"name": "cc_num", "description": "Number of the credit card performing the transaction"},
    {"name": "gender", "description": "Gender of the credit card holder"},
]

for desc in feature_descriptions: 
    profile_fg.update_feature_description(desc["name"], desc["description"])

In [13]:
project._api_key_value

AttributeError: 'Project' object has no attribute '_api_key_value'