In [2]:
import sys
import os
from datetime import datetime, timedelta, timezone
import pandas as pd

# Add src directory to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from src import config
from src.data_utils import load_and_process_citibike_data, transform_raw_data_into_ts_data


In [9]:
from src.data_utils import load_and_process_citibike_data, transform_raw_data_into_ts_data
from hsfs.feature import Feature
from datetime import timedelta

In [15]:
import pandas as pd
from pathlib import Path

def load_and_process_citibike_data_from_local(year: int, months: list, base_path: str = "data/citibike") -> pd.DataFrame:
    """
    Load Citi Bike data directly from locally extracted CSV files.
    
    Parameters:
        year (int): The year of the data.
        months (list of int): The months to load (e.g., [1, 2, 3]).
        base_path (str): Base folder where CSVs are stored.

    Returns:
        pd.DataFrame: Concatenated and cleaned dataframe for the given months.
    """
    all_months_data = []
    data_dir = Path(base_path)

    for month in months:
        month_str = f"{year}{month:02d}"
        csv_files = list(data_dir.glob(f"{month_str}-citibike-tripdata*.csv"))

        if not csv_files:
            raise FileNotFoundError(f"❌ No CSV files found for {month_str} in {data_dir}")

        for csv_file in csv_files:
            df = pd.read_csv(csv_file)
            df['source_file'] = csv_file.name  # Optional: helpful for debugging
            all_months_data.append(df)

    df_all = pd.concat(all_months_data, ignore_index=True)

    # Minimal preprocessing (adjust if needed)
    df_all['started_at'] = pd.to_datetime(df_all['started_at'])
    df_all['start_station_id'] = df_all['start_station_id'].astype(str)

    return df_all


In [16]:
def fetch_batch_raw_data(from_date: datetime, to_date: datetime) -> pd.DataFrame:
    if from_date >= to_date:
        raise ValueError("'from_date' must be earlier than 'to_date'.")

    historical_from_date = (from_date - timedelta(weeks=52)).replace(tzinfo=None)
    historical_to_date = (to_date - timedelta(weeks=52)).replace(tzinfo=None)

    rides_from = load_and_process_citibike_data_from_local(
        year=historical_from_date.year, months=[historical_from_date.month]
    )
    rides_from['started_at'] = rides_from['started_at'].dt.tz_localize(None)
    rides_from = rides_from[rides_from.started_at >= historical_from_date]

    if historical_to_date.month != historical_from_date.month:
        rides_to = load_and_process_citibike_data_from_local(
            year=historical_to_date.year, months=[historical_to_date.month]
        )
        rides_to['started_at'] = rides_to['started_at'].dt.tz_localize(None)
        rides_to = rides_to[rides_to.started_at < historical_to_date]
        rides = pd.concat([rides_from, rides_to], ignore_index=True)
    else:
        rides = rides_from

    rides['started_at'] += timedelta(weeks=52)
    rides.sort_values(by=['start_station_id', 'started_at'], inplace=True)

    return rides


In [17]:
# Since we have data till March 2025 (shifted forward), fetch last 30 days from April 1st
fetch_data_to = pd.Timestamp("2025-04-01 00:00:00", tz="UTC")
fetch_data_from = fetch_data_to - timedelta(days=29)


In [18]:
rides = fetch_batch_raw_data(fetch_data_from, fetch_data_to)
print("✅ Raw data shape:", rides.shape)
rides.head()




✅ Raw data shape: (2524662, 14)


Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,source_file
2423338,4863AD999C0FF96A,classic_bike,2025-03-03 07:56:26.881,2024-03-04 08:42:32.929,67 St & Erik Pl,2733.03,Old Slip & South St,4993.13,40.633385,-74.016562,40.703367,-74.007868,member,202403-citibike-tripdata.csv
1767937,BFDCAE10479EBE64,classic_bike,2025-03-03 11:27:33.039,2024-03-04 11:29:39.185,67 St & Erik Pl,2733.03,5 Ave & 67 St,2782.02,40.633385,-74.016562,40.635679,-74.020005,member,202403-citibike-tripdata.csv
1950372,C47793EFA725FFD9,electric_bike,2025-03-03 11:41:17.216,2024-03-04 12:18:30.784,67 St & Erik Pl,2733.03,Caton Ave & Argyle Rd,3303.03,40.633358,-74.016553,40.649681,-73.967829,member,202403-citibike-tripdata.csv
1535852,EEA928AACAE45C2E,electric_bike,2025-03-03 17:39:03.303,2024-03-04 18:12:42.351,67 St & Erik Pl,2733.03,E 18 St & Church Ave,3263.01,40.633442,-74.016552,40.64958,-73.96316,member,202403-citibike-tripdata.csv
2248369,28498637211D1D1A,classic_bike,2025-03-04 18:05:04.760,2024-03-05 18:38:04.203,67 St & Erik Pl,2733.03,Douglass St & 4 Ave,4175.14,40.633385,-74.016562,40.679279,-73.98154,member,202403-citibike-tripdata.csv


In [20]:
rides = rides.rename(columns={"started_at": "pickup_datetime", "start_station_id": "pickup_location_id"})


In [27]:
print(rides.columns)
rides.head()


Index(['ride_id', 'rideable_type', 'pickup_datetime', 'ended_at',
       'start_station_name', 'pickup_location_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'source_file', 'pickup_hour'],
      dtype='object')


Unnamed: 0,ride_id,rideable_type,pickup_datetime,ended_at,start_station_name,pickup_location_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,source_file,pickup_hour
2423338,4863AD999C0FF96A,classic_bike,2025-03-03 07:56:26.881,2024-03-04 08:42:32.929,67 St & Erik Pl,2733.03,Old Slip & South St,4993.13,40.633385,-74.016562,40.703367,-74.007868,member,202403-citibike-tripdata.csv,2025-03-03 07:00:00
1767937,BFDCAE10479EBE64,classic_bike,2025-03-03 11:27:33.039,2024-03-04 11:29:39.185,67 St & Erik Pl,2733.03,5 Ave & 67 St,2782.02,40.633385,-74.016562,40.635679,-74.020005,member,202403-citibike-tripdata.csv,2025-03-03 11:00:00
1950372,C47793EFA725FFD9,electric_bike,2025-03-03 11:41:17.216,2024-03-04 12:18:30.784,67 St & Erik Pl,2733.03,Caton Ave & Argyle Rd,3303.03,40.633358,-74.016553,40.649681,-73.967829,member,202403-citibike-tripdata.csv,2025-03-03 11:00:00
1535852,EEA928AACAE45C2E,electric_bike,2025-03-03 17:39:03.303,2024-03-04 18:12:42.351,67 St & Erik Pl,2733.03,E 18 St & Church Ave,3263.01,40.633442,-74.016552,40.64958,-73.96316,member,202403-citibike-tripdata.csv,2025-03-03 17:00:00
2248369,28498637211D1D1A,classic_bike,2025-03-04 18:05:04.760,2024-03-05 18:38:04.203,67 St & Erik Pl,2733.03,Douglass St & 4 Ave,4175.14,40.633385,-74.016562,40.679279,-73.98154,member,202403-citibike-tripdata.csv,2025-03-04 18:00:00


In [29]:
import pandas as pd

# Ensure proper datatypes
rides['pickup_datetime'] = pd.to_datetime(rides['pickup_datetime'], errors='coerce')
rides = rides[pd.to_numeric(rides['pickup_location_id'], errors='coerce').notna()]
rides['pickup_location_id'] = rides['pickup_location_id'].astype(float)

# Top 3 station IDs
top_station_ids = [6140.05, 6948.10, 5329.03]

# Filter for top stations
top_rides = rides[rides['pickup_location_id'].isin(top_station_ids)].copy()

# Extract year
top_rides['year'] = top_rides['pickup_datetime'].dt.year

# Group and count
ride_counts = (
    top_rides
    .groupby(['pickup_location_id', 'year'])
    .size()
    .reset_index(name='ride_count')
    .sort_values(['pickup_location_id', 'year'])
)

# Show the result
print("📊 Ride counts for top 3 start stations by year:")
print(ride_counts.to_string(index=False))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


📊 Ride counts for top 3 start stations by year:
 pickup_location_id  year  ride_count
            5329.03  2025        6261
            6140.05  2025       10652
            6948.10  2025        9179


In [21]:
ts_data = transform_raw_data_into_ts_data(rides)
print("✅ Transformed time-series shape:", ts_data.shape)
ts_data.head()


✅ Transformed time-series shape: (1554864, 3)


Unnamed: 0,pickup_hour,pickup_location_id,rides
0,2025-03-03 00:00:00,2733.03,0
1,2025-03-03 01:00:00,2733.03,0
2,2025-03-03 02:00:00,2733.03,0
3,2025-03-03 03:00:00,2733.03,0
4,2025-03-03 04:00:00,2733.03,0


In [30]:
# Keep only top 3 station IDs
top_station_ids = [6140.05, 6948.10, 5329.03]
ts_data = ts_data[ts_data['pickup_location_id'].isin(top_station_ids)].copy()

# Convert pickup_location_id and rides to correct dtypes (int)
ts_data['pickup_location_id'] = pd.to_numeric(ts_data['pickup_location_id'], errors='coerce').astype(float).astype(int)
ts_data['rides'] = pd.to_numeric(ts_data['rides'], errors='coerce').astype(int)


In [32]:
ts_data['pickup_location_id'] = ts_data['pickup_location_id'].astype("int32")
ts_data['rides'] = ts_data['rides'].astype("int32")


In [33]:
import hopsworks

project = hopsworks.login(
    project=config.HOPSWORKS_PROJECT_NAME,
    api_key_value=config.HOPSWORKS_API_KEY
)

fs = project.get_feature_store()
fg = fs.get_feature_group(
    name=config.FEATURE_GROUP_NAME,
    version=config.FEATURE_GROUP_VERSION
)

fg.insert(ts_data, write_options={"wait_for_job": False})


2025-05-02 23:47:37,891 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-02 23:47:37,896 INFO: Initializing external client
2025-05-02 23:47:37,896 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-02 23:47:38,495 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


Uploading Dataframe: 0.00% |          | Rows 0/0 | Elapsed Time: 00:00 | Remaining Time: ?
Use fg.materialization_job.run(args=-op offline_fg_materialization -path hdfs:///Projects/sp25_taxi_main/Resources/jobs/time_series_hourly_feature_group_1_offline_fg_materialization/config_1741035530909) to trigger the materialization job again.


(Job('time_series_hourly_feature_group_1_offline_fg_materialization', 'SPARK'),
 None)

In [34]:
import hopsworks
from hsfs.feature import Feature

project = hopsworks.login()
fs = project.get_feature_store()

# Drop existing feature group if it exists
try:
    fg = fs.get_feature_group("time_series_hourly_feature_group", version=1)
    fg.delete()
    print("✅ Deleted old feature group.")
except:
    print("ℹ️ No existing feature group to delete.")

# Define new schema
features = [
    Feature(name="pickup_hour", type="timestamp"),
    Feature(name="pickup_location_id", type="string"),  # Changed from int
    Feature(name="rides", type="int"),
]

# Create new feature group
fg = fs.create_feature_group(
    name="time_series_hourly_feature_group",
    version=1,
    description="Hourly aggregated Citi Bike rides per location",
    primary_key=["pickup_hour", "pickup_location_id"],
    event_time="pickup_hour",
    features=features,
    online_enabled=True
)

print("✅ Created new feature group with correct schema.")


2025-05-03 13:40:07,025 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 13:40:07,048 INFO: Initializing external client
2025-05-03 13:40:07,049 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 13:40:07,697 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683




ℹ️ No existing feature group to delete.
✅ Created new feature group with correct schema.


In [36]:
import hopsworks
import src.config as c           # has the FEATURE_VIEW_NAME / VERSION you use

project = hopsworks.login(
    project       = c.HOPSWORKS_PROJECT_NAME,
    api_key_value = c.HOPSWORKS_API_KEY,
)
fs = project.get_feature_store()

print("✅  Connected. Feature‑store id:", fs.id)


2025-05-03 15:29:14,609 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 15:29:14,611 INFO: Initializing external client
2025-05-03 15:29:14,612 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 15:29:15,283 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683
✅  Connected. Feature‑store id: 1202314


In [37]:
from hsfs.feature import Feature

NAME    = c.FEATURE_VIEW_NAME        # "time_series_hourly_feature_view"
OLD_VER = c.FEATURE_VIEW_VERSION     # 1
NEW_VER = OLD_VER + 1                # 2

# ── A) Delete the stale v1 (ignore error if it’s already gone)
try:
    fs.get_feature_view(NAME, version=OLD_VER).delete()
    print(f"🗑️  Deleted old feature‑view {NAME} v{OLD_VER}")
except Exception as e:
    print(f"ℹ️  Could not delete v{OLD_VER} (probably already gone).")

# ── B) Point the view at the new feature‑group you created
fg = fs.get_feature_group("time_series_hourly_feature_group", version=1)

fv = fs.create_feature_view(
    name        = NAME,
    version     = NEW_VER,
    description = "Hourly Citi Bike rides (3 top stations)",
    labels      = ["rides"],
    query       = fg.select_all()
)

print(f"✅  Feature‑view rebuilt: {fv.name}  v{fv.version}")


ℹ️  Could not delete v1 (probably already gone).


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1214683/featurestores/1202314/featureview). Server response: 
HTTP code: 500, HTTP reason: Internal Server Error, body: b'{"errorCode":120003,"usrMsg":"Transaction marked for rollback.","errorMsg":"The last transaction did not complete as expected"}', error code: 120003, error msg: The last transaction did not complete as expected, user msg: Transaction marked for rollback.

In [None]:
# --- reconnect (fresh session is safest) -----------------
project = hopsworks.login(
    project       = c.HOPSWORKS_PROJECT_NAME,
    api_key_value = c.HOPSWORKS_API_KEY,
)
fs = project.get_feature_store()

# --- choose the next clean version -----------------------
NAME     = c.FEATURE_VIEW_NAME          # "time_series_hourly_feature_view"
NEW_VER  = 3                            # skip v2 that failed

fg = fs.get_feature_group("time_series_hourly_feature_group", version=1)

fv = fs.create_feature_view(
    name        = NAME,
    version     = NEW_VER,
    description = "Hourly Citi Bike rides (3 top stations)",
    labels      = ["rides"],
    query       = fg.select_all()
)

print(f"✅  Feature‑view created: {fv.name}  v{fv.version}")


2025-05-03 15:31:58,693 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 15:31:58,695 INFO: Initializing external client
2025-05-03 15:31:58,696 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 15:31:59,308 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1214683/featurestores/1202314/featureview). Server response: 
HTTP code: 500, HTTP reason: Internal Server Error, body: b'{"errorCode":120003,"usrMsg":"Transaction marked for rollback.","errorMsg":"The last transaction did not complete as expected"}', error code: 120003, error msg: The last transaction did not complete as expected, user msg: Transaction marked for rollback.

In [39]:
import hopsworks, src.config as c

project = hopsworks.login(
    project       = c.HOPSWORKS_PROJECT_NAME,
    api_key_value = c.HOPSWORKS_API_KEY,
)
fs = project.get_feature_store()

NEW_NAME = "cb_hourly_feature_view"     # any fresh name
NEW_VER  = 1                            # start at 1 again

fg = fs.get_feature_group("time_series_hourly_feature_group", version=1)

fv = fs.create_feature_view(
    name        = NEW_NAME,
    version     = NEW_VER,
    description = "Hourly Citi Bike rides (3 top stations)",
    labels      = ["rides"],
    query       = fg.select_all()
)

print(f"✅  Feature‑view created: {fv.name}  v{fv.version}")


2025-05-03 15:33:31,220 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 15:33:31,222 INFO: Initializing external client
2025-05-03 15:33:31,222 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 15:33:31,848 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1214683/featurestores/1202314/featureview). Server response: 
HTTP code: 500, HTTP reason: Internal Server Error, body: b'{"errorCode":120003,"usrMsg":"Transaction marked for rollback.","errorMsg":"The last transaction did not complete as expected"}', error code: 120003, error msg: The last transaction did not complete as expected, user msg: Transaction marked for rollback.

In [41]:
# list every feature‑view in the store
views = fs.get_feature_views(name=None)   # ← pass name=None
for v in views:
    # .id may not be exposed in some versions; print status too
    print(f"{v.name:<35}  v{v.version:<2}  status={getattr(v, 'status', '‑')}  id={getattr(v, 'id', '‑')}")


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1214683/featurestores/1202314/featureview/None). Server response: 
HTTP code: 404, HTTP reason: Not Found, body: b'{"errorCode":120004,"usrMsg":"HTTP 404 Not Found","errorMsg":"Web application exception occurred"}', error code: 120004, error msg: Web application exception occurred, user msg: HTTP 404 Not Found

In [42]:
VIEW_NAME = c.FEATURE_VIEW_NAME          # "time_series_hourly_feature_view"

views = fs.get_feature_views(name=VIEW_NAME)   # returns *all* versions for that name
for v in views:
    print(f"{v.name:<35}  v{v.version:<3}   status={getattr(v,'status','‑')}")


ValueError: Cannot get back the feature view because the query defined is no longer valid. Some feature groups used in the query may have been deleted. You can clean up this feature view on the UI or `FeatureView.clean`.

In [43]:
from hsfs.client.exceptions import RestAPIError

VIEW = c.FEATURE_VIEW_NAME          # "time_series_hourly_feature_view"

# brute‑force try versions 1‑10 and hard‑delete if they exist
for v in range(1, 11):
    try:
        fv = fs.get_feature_view(VIEW, version=v)
        print(f"🗑️  found {VIEW} v{v} – cleaning …")
        fv.clean(hard_delete=True)          # <- *really* removes metadata
        print(f"   → deleted v{v}")
    except (ValueError, RestAPIError):
        pass                                # view/version not present / already invalid


🗑️  found time_series_hourly_feature_view v2 – cleaning …


AttributeError: 'NoneType' object has no attribute 'clean'

In [44]:
from hsfs.client.exceptions import RestAPIError

VIEW = c.FEATURE_VIEW_NAME          # "time_series_hourly_feature_view"

for v in range(1, 11):
    try:
        fv = fs.get_feature_view(VIEW, version=v)
        if fv is None:                       # ← skip missing / corrupt entries
            continue
        print(f"🗑️  found {VIEW} v{v} – cleaning …")
        fv.clean(hard_delete=True)
        print(f"   → deleted v{v}")
    except (ValueError, RestAPIError):
        pass


In [45]:
# still in the same notebook / Python session
fg = fs.get_feature_group("time_series_hourly_feature_group", version=1)

fv = fs.create_feature_view(
    name        = c.FEATURE_VIEW_NAME,     # "time_series_hourly_feature_view"
    version     = 1,                       # brand‑new v1
    description = "Hourly Citi Bike rides (3 top stations)",
    labels      = ["rides"],
    query       = fg.select_all()
)

print("✅  Feature‑view ready:", fv.name, "v", fv.version)


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1214683/featurestores/1202314/featureview). Server response: 
HTTP code: 400, HTTP reason: Bad Request, body: b'{"errorCode":270179,"usrMsg":"Feature view: time_series_hourly_feature_view, version: 1","errorMsg":"The provided feature view name and version already exists"}', error code: 270179, error msg: The provided feature view name and version already exists, user msg: Feature view: time_series_hourly_feature_view, version: 1

In [48]:
from hsfs.core.feature_view_api import FeatureViewApi
from hopsworks.client.exceptions import RestAPIError

VIEW_NAME = c.FEATURE_VIEW_NAME    # "time_series_hourly_feature_view"
VIEW_VER  = 1

# get the numeric id of the current feature‑store
store_id = fs.id            # <‑─ this exists on the FeatureStore object
# (in older hsfs versions it’s `fs._id`; use whichever is present)

_fv_api = FeatureViewApi(store_id)

try:
    print(f"🔍 trying hard‑delete of {VIEW_NAME} v{VIEW_VER} …")
    _fv_api.delete(name=VIEW_NAME, version=VIEW_VER, hard_delete=True)
    print("✅ hard‑deleted")
except RestAPIError:
    print("ℹ️  nothing to delete – continuing")


🔍 trying hard‑delete of time_series_hourly_feature_view v1 …


AttributeError: 'FeatureViewApi' object has no attribute 'delete'

In [49]:
from hsfs.client.exceptions import RestAPIError

VIEW = c.FEATURE_VIEW_NAME          # "time_series_hourly_feature_view"

for v in range(1, 11):               # try v1 … v10
    try:
        fv = fs.get_feature_view(VIEW, version=v)
        print(f"🗑️  cleaning {VIEW} v{v} …")
        fv.clean(hard_delete=True)   # <-- really drops metadata
    except (ValueError, RestAPIError):
        # either the view/version doesn’t exist, or the query is broken
        continue


🗑️  cleaning time_series_hourly_feature_view v2 …


AttributeError: 'NoneType' object has no attribute 'clean'

In [50]:
import hopsworks, src.config as c

# ── connect ─────────────────────────────────────────────
project = hopsworks.login(
    project       = c.HOPSWORKS_PROJECT_NAME,
    api_key_value = c.HOPSWORKS_API_KEY,
)
fs = project.get_feature_store()

# ── point the view at the feature‑group we populated ────
fg = fs.get_feature_group("time_series_hourly_feature_group", version=1)

fv = fs.create_feature_view(
    name        = c.FEATURE_VIEW_NAME,       # "time_series_hourly_feature_view"
    version     = 1,                         # new v1 (store is empty now)
    description = "Hourly Citi Bike rides (3 top stations)",
    labels      = ["rides"],
    query       = fg.select_all()
)

print("✅ feature‑view created:", fv.name, "v", fv.version)


2025-05-03 16:02:11,889 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 16:02:11,892 INFO: Initializing external client
2025-05-03 16:02:11,893 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 16:02:12,564 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1214683/featurestores/1202314/featureview). Server response: 
HTTP code: 500, HTTP reason: Internal Server Error, body: b'{"errorCode":120003,"usrMsg":"Transaction marked for rollback.","errorMsg":"The last transaction did not complete as expected"}', error code: 120003, error msg: The last transaction did not complete as expected, user msg: Transaction marked for rollback.

In [51]:
NEW_VIEW_NAME = "citibike_hourly_feature_view"   # <‑ any unused name
fg = fs.get_feature_group("time_series_hourly_feature_group", version=1)

fv = fs.create_feature_view(
    name        = NEW_VIEW_NAME,
    version     = 1,
    description = "Hourly Citi Bike rides (3 top stations)",
    labels      = ["rides"],
    query       = fg.select_all()
)

print("✅ feature‑view created:", fv.name, "v", fv.version)


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1214683/featurestores/1202314/featureview). Server response: 
HTTP code: 500, HTTP reason: Internal Server Error, body: b'{"errorCode":120003,"usrMsg":"Transaction marked for rollback.","errorMsg":"The last transaction did not complete as expected"}', error code: 120003, error msg: The last transaction did not complete as expected, user msg: Transaction marked for rollback.

In [52]:
import hopsworks, src.config as c

project = hopsworks.login(
    project       = c.HOPSWORKS_PROJECT_NAME,
    api_key_value = c.HOPSWORKS_API_KEY,
)
fs = project.get_feature_store()

# list everything the store still thinks exists
for fv in fs.get_feature_views(name=None):
    print(f"🟢  {fv.name:<35}  v{fv.version}")


2025-05-03 16:21:01,364 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 16:21:01,367 INFO: Initializing external client
2025-05-03 16:21:01,367 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 16:21:02,119 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1214683/featurestores/1202314/featureview/None). Server response: 
HTTP code: 404, HTTP reason: Not Found, body: b'{"errorCode":120004,"usrMsg":"HTTP 404 Not Found","errorMsg":"Web application exception occurred"}', error code: 120004, error msg: Web application exception occurred, user msg: HTTP 404 Not Found

In [53]:
import hopsworks, src.config as c, json, requests

project = hopsworks.login(
    project       = c.HOPSWORKS_PROJECT_NAME,
    api_key_value = c.HOPSWORKS_API_KEY,
)
fs = project.get_feature_store()

# the REST URL we need
base   = project._client._base_url          # e.g. https://c.app.hopsworks.ai
projId = project.id
storeId = fs.id

url = f"{base}/hopsworks-api/api/project/{projId}/featurestores/{storeId}/featureview"

resp = requests.get(url, headers=project._client._get_auth_header())
resp.raise_for_status()
views = resp.json()["items"]

print(f"Found {len(views)} Feature View objects in metadata:")
for v in views:
    print(" •", v["name"], "v", v["version"])


2025-05-03 16:23:27,398 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 16:23:27,401 INFO: Initializing external client
2025-05-03 16:23:27,401 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 16:23:28,124 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


AttributeError: 'Project' object has no attribute '_client'

In [54]:
 import os, requests, json, hopsworks, src.config as c

# ── 1 login just once — NO feature‑view code yet ───────────────────────────────
project = hopsworks.login(
    project       = c.HOPSWORKS_PROJECT_NAME,
    api_key_value = c.HOPSWORKS_API_KEY,
)
fs = project.get_feature_store()

# ── 2 build the raw REST URL ───────────────────────────────────────────────────
base_url = project.client._base_url           # e.g. https://c.app.hopsworks.ai
proj_id  = project.id
store_id = fs.id

url = f"{base_url}/hopsworks-api/api/project/{proj_id}/featurestores/{store_id}/featureview"

# ── 3 GET every feature‑view object in JSON ────────────────────────────────────
hdrs = project.client._get_auth_header()
resp  = requests.get(url, headers=hdrs, timeout=60)
resp.raise_for_status()

views_json = resp.json()["items"]
print(f"\n📋  Registry currently contains {len(views_json)} Feature Views")
for v in views_json:
    print(" •", v["name"], "v", v["version"])


2025-05-03 16:28:13,950 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 16:28:13,953 INFO: Initializing external client
2025-05-03 16:28:13,954 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 16:28:14,585 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


AttributeError: 'Project' object has no attribute 'client'

In [57]:
import requests, hopsworks, src.config as c
from hopsworks import client as _hc

# ── 1 login once ──────────────────────────────────────────────────────────────
project = hopsworks.login(
    project       = c.HOPSWORKS_PROJECT_NAME,
    api_key_value = c.HOPSWORKS_API_KEY,
)
fs         = project.get_feature_store()
raw_client = _hc.get_instance()          # global low‑level client

# ── 2 assemble REST URL + auth header (OLD helper) ────────────────────────────
base_url = raw_client._base_url          # e.g. https://c.app.hopsworks.ai
proj_id  = project.id
store_id = fs.id

url  = f"{base_url}/hopsworks-api/api/project/{proj_id}/featurestores/{store_id}/featureview"

hdrs = raw_client._get_auth_header()     # ← works on older SDKs

# ── 3 list every Feature‑View JSON still in the registry ──────────────────────
resp = requests.get(url, headers=hdrs, timeout=60)
resp.raise_for_status()

items = resp.json()["items"]
print(f"\n📋  Registry currently contains {len(items)} Feature Views:")
for v in items:
    print(f" • {v['name']:<35}  v{v['version']}")


2025-05-03 16:44:11,504 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 16:44:11,506 INFO: Initializing external client
2025-05-03 16:44:11,506 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 16:44:12,227 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


AttributeError: 'Client' object has no attribute '_get_auth_header'

In [58]:
import hopsworks
import pandas as pd
import src.config as c  # ensure your config contains project name and API key

# 1. Connect to Hopsworks
project = hopsworks.login(
    project=c.HOPSWORKS_PROJECT_NAME,
    api_key_value=c.HOPSWORKS_API_KEY,
)
fs = project.get_feature_store()

# 2. Load your final hourly dataframe (replace with your actual dataframe)
df = pd.read_parquet("data/hourly_data.parquet")  # adjust path as needed

# 3. Create feature group
fg = fs.create_feature_group(
    name=c.FEATURE_GROUP_NAME,  # e.g. "time_series_hourly_feature_group"
    version=1,
    description="Hourly aggregated Citi Bike rides per location",
    primary_key=["pickup_location_id", "pickup_hour"],
    event_time="pickup_hour",
)

# 4. Save the data to the feature group
fg.insert(df, write_options={"wait_for_job": True})


2025-05-03 17:02:49,966 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-03 17:02:49,971 INFO: Initializing external client
2025-05-03 17:02:49,971 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-03 17:02:50,650 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214683


FileNotFoundError: [Errno 2] No such file or directory: 'data/hourly_data.parquet'