In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import src.config as config

In [18]:
from src.inference import get_feature_store, fetch_predictions
import pandas as pd
from datetime import datetime, timedelta, timezone


def fetch_days_data(days):
    current_date = pd.to_datetime(datetime.now(timezone.utc))
    fetch_data_from = current_date - timedelta(days=(365+days)) 
    fetch_data_to = current_date - timedelta(days=365)
    print(fetch_data_from, fetch_data_to)
    fs = get_feature_store()
    fg = fs.get_feature_group(
        name=config.FEATURE_GROUP_NAME,
        version=1
    )

    query = fg.select_all()
    # query = query.filter((fg.pickup_hour >= fetch_data_from))
    df = query.read()
    cond = (df["pickup_hour"] >= fetch_data_from) & (df["pickup_hour"] <= fetch_data_to)
    return df[cond]

In [19]:
ts_data = fetch_days_data(180)

2023-09-08 07:06:17.538669+00:00 2024-03-06 07:06:17.538669+00:00
2025-03-06 02:06:17,540 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-06 02:06:17,543 INFO: Initializing external client
2025-03-06 02:06:17,544 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-06 02:06:18,210 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215680
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (5.88s) 


2025-03-04 10:17:13,963 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214690
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (5.75s) 


In [20]:
from src.data_utils import transform_ts_data_info_features_and_target

features, targets = transform_ts_data_info_features_and_target(ts_data, window_size=24*28, step_size=23)

In [21]:
from src.pipeline_utils import get_pipeline
pipeline = get_pipeline()
pipeline.fit(features, targets)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 171646
[LightGBM] [Info] Number of data points in the train set: 23660, number of used features: 675
[LightGBM] [Info] Start training from score 16.623838


In [22]:
from sklearn.metrics import mean_absolute_error
predictions = pipeline.predict(features)

In [23]:
test_mae = mean_absolute_error(targets, predictions)
print(f"{test_mae:.4f}")

2.9019


In [24]:
from src.inference import load_metrics_from_registry 

metric = load_metrics_from_registry()

2025-03-06 02:06:29,839 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-06 02:06:29,841 INFO: Initializing external client
2025-03-06 02:06:29,841 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-06 02:06:30,440 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215680


In [25]:
metric

{'test_mae': 1.935489834251996}

In [26]:
from src.inference import load_model_from_registry
model = load_model_from_registry()

2025-03-06 02:06:33,647 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-06 02:06:33,649 INFO: Initializing external client
2025-03-06 02:06:33,649 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-06 02:06:34,292 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215680
Downloading model artifact (0 dirs, 1 files)... DONE

In [27]:
import joblib  
import src.config
# Save the pipeline  
joblib.dump(pipeline, config.MODELS_DIR / "lgb_model.pkl")

['/Users/ramprakashyallavula/Downloads/sp25_taxi-main 2/models/lgb_model.pkl']

In [28]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

input_schema = Schema(features)
output_schema = Schema(targets)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

In [29]:
from src.inference import get_feature_store, fetch_predictions
import pandas as pd
from datetime import datetime, timedelta, timezone
import src.config as config
import joblib
from hsml.schema import Schema
from hsml.model_schema import ModelSchema
import hopsworks

try:
    # Step 1: Initialize Hopsworks project
    project = hopsworks.login(
        api_key_value=config.HOPSWORKS_API_KEY
    )
    
    # Step 2: Fetch historical data
    current_date = pd.to_datetime(datetime.now(timezone.utc))
    fetch_data_from = current_date - timedelta(days=(365+180))
    fetch_data_to = current_date - timedelta(days=365)
    
    fs = get_feature_store()
    fg = fs.get_feature_group(
        name=config.FEATURE_GROUP_NAME,
        version=1
    )
    
    ts_data = fg.read()
    mask = (ts_data["pickup_hour"] >= fetch_data_from) & (ts_data["pickup_hour"] <= fetch_data_to)
    ts_data = ts_data[mask]
    
    # ... rest of your existing code ...
    
    # Step 6: Update model registry using initialized project
    model_registry = project.get_model_registry()
    model = model_registry.sklearn.create_model(
        name=config.MODEL_NAME,
        metrics={"test_mae": test_mae},
        description="LightGBM regressor - Retrained",
        input_example=features.sample(),
        model_schema=model_schema
    )
    
    model.save(str(config.MODELS_DIR / "lgb_model.pkl"))
    print("Model successfully saved to registry")

except Exception as e:
    print(f"Error during model retraining: {str(e)}")
    print("\nDebug Info:")
    if 'features' in locals():
        print(f"Features shape: {features.shape}")
    if 'ts_data' in locals():
        print(f"Data time range: {ts_data['pickup_hour'].min()} to {ts_data['pickup_hour'].max()}")

2025-03-06 02:06:38,154 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-06 02:06:38,155 INFO: Initializing external client
2025-03-06 02:06:38,156 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-06 02:06:38,773 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215680
2025-03-06 02:06:39,254 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-06 02:06:39,260 INFO: Initializing external client
2025-03-06 02:06:39,261 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-06 02:06:39,945 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215680
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (5.80s) 


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading: 0.000%|          | 0/316708 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/2062 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/48705 elapsed<00:00 remaining<?

Model created, explore it at https://c.app.hopsworks.ai:443/p/1215680/models/taxi_demand_predictor_next_hour/17
Model successfully saved to registry


In [30]:
modelv2 = model_registry.sklearn.create_model(
    name="taxi_demand_predictor_next_hour",
    metrics={"test_mae": test_mae},
    description="LightGBM regressor V2",
    input_example=features.sample(),
    model_schema=model_schema,
)

In [31]:
from src.inference import get_feature_store
import pandas as pd
from datetime import datetime, timedelta, timezone
import src.config as config
import joblib
from hsml.schema import Schema
from hsml.model_schema import ModelSchema
import hopsworks
from pathlib import Path

try:
    # Step 1: Initialize Hopsworks project
    project = hopsworks.login(
        api_key_value=config.HOPSWORKS_API_KEY
    )
    
    # ... existing training code ...
    
    # Step 5: Save model locally first
    model_path = Path(config.MODELS_DIR) / "lgb_model.pkl"
    model_path.parent.mkdir(parents=True, exist_ok=True)  # Create directory if it doesn't exist
    joblib.dump(pipeline, model_path)
    
    # Step 6: Update model registry
    input_schema = Schema(features)
    output_schema = Schema(targets)
    model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)
    
    model_registry = project.get_model_registry()
    model = model_registry.sklearn.create_model(
        name=config.MODEL_NAME,
        metrics={"test_mae": test_mae},
        description="LightGBM regressor - Retrained",
        input_example=features.sample(),
        model_schema=model_schema
    )
    
    # Save to model registry using correct path
    model.save(str(model_path))
    print(f"Model successfully saved to: {model_path}")

except Exception as e:
    print(f"Error during model retraining: {str(e)}")
    print("\nDebug Info:")
    if 'model_path' in locals():
        print(f"Attempted to save model to: {model_path}")
        print(f"Directory exists: {model_path.parent.exists()}")

2025-03-06 02:07:03,231 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-06 02:07:03,234 INFO: Initializing external client
2025-03-06 02:07:03,234 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-06 02:07:03,819 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215680


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading: 0.000%|          | 0/316708 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/2053 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/48705 elapsed<00:00 remaining<?

Model created, explore it at https://c.app.hopsworks.ai:443/p/1215680/models/taxi_demand_predictor_next_hour/18
Model successfully saved to: /Users/ramprakashyallavula/Downloads/sp25_taxi-main 2/models/lgb_model.pkl


In [32]:
from src.inference import load_model_from_registry
model = load_model_from_registry(-1)

2025-03-06 02:07:18,462 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-06 02:07:18,466 INFO: Initializing external client
2025-03-06 02:07:18,466 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-06 02:07:19,166 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215680
Downloading model artifact (0 dirs, 1 files)... DONE

In [33]:
models = model_registry.get_models(name=config.MODEL_NAME)

In [34]:
models

[Model(name: 'taxi_demand_predictor_next_hour', version: 4),
 Model(name: 'taxi_demand_predictor_next_hour', version: 17),
 Model(name: 'taxi_demand_predictor_next_hour', version: 8),
 Model(name: 'taxi_demand_predictor_next_hour', version: 10),
 Model(name: 'taxi_demand_predictor_next_hour', version: 16),
 Model(name: 'taxi_demand_predictor_next_hour', version: 7),
 Model(name: 'taxi_demand_predictor_next_hour', version: 9),
 Model(name: 'taxi_demand_predictor_next_hour', version: 14),
 Model(name: 'taxi_demand_predictor_next_hour', version: 3),
 Model(name: 'taxi_demand_predictor_next_hour', version: 6),
 Model(name: 'taxi_demand_predictor_next_hour', version: 18),
 Model(name: 'taxi_demand_predictor_next_hour', version: 11),
 Model(name: 'taxi_demand_predictor_next_hour', version: 12),
 Model(name: 'taxi_demand_predictor_next_hour', version: 13),
 Model(name: 'taxi_demand_predictor_next_hour', version: 1),
 Model(name: 'taxi_demand_predictor_next_hour', version: 2),
 Model(name: 'ta

In [35]:
max(models, key=lambda model: model.version)


Model(name: 'taxi_demand_predictor_next_hour', version: 18)

In [36]:
load_metrics_from_registry()

2025-03-06 02:07:27,222 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-06 02:07:27,223 INFO: Initializing external client
2025-03-06 02:07:27,224 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-06 02:07:27,891 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215680


{'test_mae': 2.901920208005656}