In [1]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window
import hopsworks
from hsfs.feature import Feature

fs = hopsworks.login().get_feature_store()
clicks_fg = fs.get_feature_group("user_clicks")

print("✅ Created Feature Group: user_clicks v1 with explicit schema") 
df = clicks_fg.read()
df = df.drop("click_id") # not needed in our new feature group with aggregates
df

Initializing spark session...
Spark session available as `spark`.

Logged in to project, explore it here https://snurran.devnet.hops.works/p/4217

✅ Created Feature Group: user_clicks v1 with explicit schema


DataFrame[user_id: bigint, event_time: bigint]

----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 49926)
Traceback (most recent call last):
  File "/srv/hops/anaconda/envs/hopsworks_environment/lib/python3.10/socketserver.py", line 316, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/srv/hops/anaconda/envs/hopsworks_environment/lib/python3.10/socketserver.py", line 347, in process_request
    self.finish_request(request, client_address)
  File "/srv/hops/anaconda/envs/hopsworks_environment/lib/python3.10/socketserver.py", line 360, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/srv/hops/anaconda/envs/hopsworks_environment/lib/python3.10/socketserver.py", line 747, in __init__
    self.handle()
  File "/srv/hops/spark/python/pyspark/accumulators.py", line 262, in handle
    poll(accum_updates)
  File "/srv/hops/spark/python/pyspark/accumulators.py", line 235, in poll
    if func():
  File "/srv/hops/

In [2]:
# Explicit schema definition
schema = [
    Feature(name="user_id", type="bigint"),
    Feature(name="event_time", type="bigint"),
    Feature(name="click_count_1_min", type="bigint"),
    Feature(name="click_count_10_min", type="bigint"),
    Feature(name="click_count_30_min", type="bigint"),
    Feature(name="click_count_1_hour", type="bigint"),
]

# Create or get the feature group with schema
window_fg = fs.get_or_create_feature_group(
    name="windowed_click_counts",
    version=1,
    description="Aggregated user clickstream data (1 min, 10/30/60 min windows)",
    primary_key=["user_id", "event_time"],
    event_time="event_time",
    online_enabled=True,
    features=schema
)


In [3]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

w1   = Window.partitionBy("user_id").orderBy(F.col("event_time")).rangeBetween(-60, 0)      # 1 min
w10  = Window.partitionBy("user_id").orderBy(F.col("event_time")).rangeBetween(-600, 0)     # 10 min
w30  = Window.partitionBy("user_id").orderBy(F.col("event_time")).rangeBetween(-1800, 0)    # 30 min
w60  = Window.partitionBy("user_id").orderBy(F.col("event_time")).rangeBetween(-3600, 0)    # 1 hour

# Add columns
df = (
    df.withColumn("click_count_1_min",  F.count("*").over(w1))
      .withColumn("click_count_10_min", F.count("*").over(w10))
      .withColumn("click_count_30_min", F.count("*").over(w30))
      .withColumn("click_count_1_hour", F.count("*").over(w60))
)

In [4]:
df.show()

+-------+----------+-----------------+------------------+------------------+------------------+
|user_id|event_time|click_count_1_min|click_count_10_min|click_count_30_min|click_count_1_hour|
+-------+----------+-----------------+------------------+------------------+------------------+
|     26|1760036400|                1|                 1|                 1|                 1|
|     26|1760036402|                2|                 2|                 2|                 2|
|     26|1760036406|                3|                 3|                 3|                 3|
|     26|1760036407|                4|                 4|                 4|                 4|
|     26|1760036411|                5|                 5|                 5|                 5|
|     26|1760036413|                6|                 6|                 6|                 6|
|     26|1760036418|                7|                 7|                 7|                 7|
|     26|1760036424|                8|  

In [5]:
window_fg.insert(df)

Feature Group created successfully, explore it at 
https://snurran.devnet.hops.works/p/4217/fs/4165/fg/5146


(None, None)