In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pytz
from datetime import datetime, timedelta, date
import pandas as pd
from sklearn.preprocessing import LabelEncoder


# Define Argentina's timezone (GMT-3)
argentina_tz = pytz.timezone('America/Argentina/Buenos_Aires')

# Get the current date and time in Argentina
current_time_in_argentina = datetime.now(argentina_tz).replace(minute=0, second=0, microsecond=0).strftime('%Y-%m-%d %H:%M:%S')
# Round down (floor) to the nearest hour by setting minutes, seconds, and microseconds to 0
current_date = pd.to_datetime(current_time_in_argentina)
current_date

Timestamp('2024-10-01 23:00:00')

In [7]:
from src.inferencesm import load_batch_of_features_from_store

features = load_batch_of_features_from_store(current_date)

# Create a label encoder object
label_encoder = LabelEncoder()

# Apply label encoding to 'line' and 'station'
features['line'] = label_encoder.fit_transform(features['line'])
features['station'] = label_encoder.fit_transform(features['station'])

Connected. Call `.close()` to terminate connection gracefully.
Fetching data from 2024-09-17 23:00:00+00:00 to 2024-10-01 22:00:00+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (3.09s) 


In [5]:
features

Unnamed: 0,total_pax_previous_336_hour,total_pax_previous_335_hour,total_pax_previous_334_hour,total_pax_previous_333_hour,total_pax_previous_332_hour,total_pax_previous_331_hour,total_pax_previous_330_hour,total_pax_previous_329_hour,total_pax_previous_328_hour,total_pax_previous_327_hour,...,total_pax_previous_7_hour,total_pax_previous_6_hour,total_pax_previous_5_hour,total_pax_previous_4_hour,total_pax_previous_3_hour,total_pax_previous_2_hour,total_pax_previous_1_hour,hour_of_entry,station,line
32,0.0,0.0,0.0,0.0,0.0,0.0,2.0,167.0,492.0,1326.0,...,377.0,278.0,472.0,292.0,412.0,118.0,82.0,2024-10-01 23:00:00+00:00,0,0
8,0.0,0.0,0.0,0.0,0.0,0.0,3.0,24.0,93.0,238.0,...,136.0,68.0,103.0,72.0,86.0,54.0,31.0,2024-10-01 23:00:00+00:00,1,0
35,0.0,0.0,0.0,0.0,0.0,0.0,5.0,177.0,498.0,1376.0,...,501.0,326.0,516.0,465.0,535.0,109.0,97.0,2024-10-01 23:00:00+00:00,2,1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.0,76.0,156.0,...,149.0,91.0,157.0,111.0,116.0,46.0,62.0,2024-10-01 23:00:00+00:00,3,2
6,0.0,0.0,0.0,0.0,0.0,0.0,6.0,121.0,201.0,575.0,...,327.0,294.0,405.0,222.0,417.0,145.0,179.0,2024-10-01 23:00:00+00:00,4,1
37,0.0,0.0,0.0,0.0,0.0,0.0,6.0,166.0,530.0,1630.0,...,308.0,141.0,213.0,151.0,179.0,56.0,40.0,2024-10-01 23:00:00+00:00,5,0
33,0.0,0.0,0.0,0.0,0.0,0.0,7.0,101.0,224.0,557.0,...,885.0,656.0,1201.0,873.0,1291.0,596.0,621.0,2024-10-01 23:00:00+00:00,6,1
23,0.0,0.0,0.0,0.0,0.0,0.0,26.0,235.0,354.0,707.0,...,865.0,626.0,978.0,585.0,907.0,418.0,318.0,2024-10-01 23:00:00+00:00,7,1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0,271.0,798.0,...,250.0,142.0,210.0,150.0,205.0,79.0,100.0,2024-10-01 23:00:00+00:00,8,0
1,0.0,0.0,0.0,0.0,0.0,0.0,4.0,92.0,218.0,587.0,...,275.0,210.0,304.0,170.0,272.0,155.0,121.0,2024-10-01 23:00:00+00:00,9,0


In [None]:
from src.model_registry import get_latest_model_from_registry
from src.inferencesm import get_model_predictions

model = get_latest_model_from_registry(model_name='subwayBA_passenger_flow_updt', status= 'Production')
predictions = get_model_predictions(model, features)

In [None]:
predictions['hour_of_entry'] = current_date
predictions

In [None]:
from src.feature_store_api import get_feature_store
import src.config as config

# connect to the feature group
feature_group = get_feature_store().get_or_create_feature_group(
    name=config.FEATURE_GROUP_MODEL_PREDICTIONS,
    version=1,
    description="Predictions generated for the next 3 hours by our production model",
    primary_key = ['station', 'line', 'hour_of_entry'],
    event_time='hour_of_entry',
)

In [None]:
predictions['station'] = predictions['station'].astype('int32')
predictions['line'] = predictions['line'].astype('int32')
print(predictions.dtypes)
feature_group.insert(predictions, write_options={"wait_for_job": False})