**This notebook file is available to manually try all the services and modules in the project, you can run the services and modules, check them and show the results.**

**For this purchase prediction project yaml file pp_ml.yaml is used.**

## Modelling

### Feature Processing and Pipeline

**Notes** : no need any docker images, you can featue pipeline without mlflow and prefect

In [7]:
%load_ext autoreload
%autoreload 2

from datetime import timedelta

from typing import Dict, List, Tuple
import pandas as pd
from feast import (
    Entity,
    Feature,
    FeatureView,
    Field,
    FileSource,
    ValueType,
    FeatureService,
)
from feast.types import Float32, Int64, String
import numpy as np
from pathlib import Path

from mlkit.config import Config
from mlkit.data import DataLoader
from mlkit.log import logger
from mlkit.data import FeatureStore
from mlkit.pipeline.util import initialize_pipeline,check_output_columns

from projects.purchase_prediction.data_process import data_process_lib
from projects.purchase_prediction.pipeline.feature import load_config,load_data,init_pipeline,process_pipeline,save_features

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# from projects.purchase_prediction.pipeline.feature import feature_pipeline
# config_path = "config/pp_ml.yaml"
# pipeline_type = "feature"

# feature_pipeline(config_path, pipeline_type)

In [None]:
config_path = "config/pp_ml.yaml"
pipeline_type = "feature"


# 1. Load config
config, data_engine, pipeline_config = load_config(config_path=config_path, pipeline_type=pipeline_type)


# 2. Load data
data = load_data(config=config, data_engine=data_engine)

2024-11-18 12:35:10,875 - mlkit - INFO - Loading config from config/pp_ml.yaml


2024-11-18 12:35:11,250 - mlkit - INFO - Loading data
2024-11-18 12:35:11,252 - mlkit - INFO - Initialized DataLoader with DataEngine.PANDAS adapter
2024-11-18 12:35:11,254 - mlkit - INFO - Loading data from data/raw/customer_purchases.csv using DataEngine.PANDAS
2024-11-18 12:35:11,314 - mlkit - INFO - Successfully loaded data from data/raw/customer_purchases.csv


In [None]:
config_path = "config/pp_ml.yaml"
pipeline_type = "feature"


# 1. Load config
config, data_engine, pipeline_config = load_config(config_path=config_path, pipeline_type=pipeline_type)


# 2. Load data
data = load_data(config=config, data_engine=data_engine)

# 3. Initialize pipeline
feature_card_dict, pipeline_object_dict = init_pipeline(
    config=config, pipeline_type=pipeline_type, data_process_lib=data_process_lib
)


# 4. Process data
processed_data = process_pipeline(
    data=data,
    pipeline_config=pipeline_config,
    pipeline_object_dict=pipeline_object_dict,
    pipeline_feature_card_dict=feature_card_dict,
)

# 5. Save features
fs = save_features(processed_data, pipeline_config=pipeline_config)

### Update Online feature or  materialize 

In [1]:
from feast import FeatureStore
from datetime import datetime, timedelta

In [2]:
s = FeatureStore("projects/purchase_prediction/")

In [3]:
s.materialize(start_date=datetime.now() - timedelta(days=720), end_date=datetime.now())  # Adjust time range as needed

Materializing [1m[32m1[0m feature views from [1m[32m2022-11-30 09:48:22+03:00[0m to [1m[32m2024-11-19 09:48:22+03:00[0m into the [1m[32mredis[0m online store.

[1m[32mcustomer_id_feature_view[0m:


  0%|                                                                      | 0/1914 [00:00<?, ?it/s]


ConnectionError: Error -3 connecting to api_redis:6379. Temporary failure in name resolution.

### Model Training and Pipeline

In [None]:
%load_ext autoreload
%autoreload 2

from mlkit.config import Config

from mlkit.model.model import MLAdapter # LightGBMAdapter,XGBoostAdapter,RandomForestAdapter
from mlkit.track import ExperimentTracker
from mlkit.train.trainer import ModelTrainer
from mlkit.data import DataLoader
from mlkit.pipeline.util import initialize_pipeline
from projects.purchase_prediction.data_process import data_process_lib
from mlkit.log import logger


from projects.purchase_prediction.pipeline.train import load_config,initialize_components
from projects.purchase_prediction.pipeline.train import read_data,process_pipeline_steps,start_tracker,train_model,prepare_training_data
from projects.purchase_prediction.pipeline.train import perform_cross_validation,log_results
#from mlkit.data 

In [None]:
# 1. Load config
config_path = "config/pp_ml.yaml"
pipeline_type = "train"

config, pipeline_config = load_config(config_path, pipeline_type)
config.tracking.tracking_uri = "http://localhost:5000"  # note use in docker

(
    data_loader,
    ml_adapter,
    trainer,
    tracker,
    pipeline_step_card_dict,
    pipeline_object_dict,
) = initialize_components(config, pipeline_type, data_process_lib)


start_tracker(config, tracker, pipeline_config)

df = read_data(data_loader)

df, data_process_params = process_pipeline_steps(df, pipeline_object_dict, pipeline_step_card_dict)

In [None]:
X, y = prepare_training_data(df, pipeline_config, ml_adapter)

trained_model, val_results = train_model(ml_adapter, trainer, X, y)

cv_metrics = perform_cross_validation(ml_adapter, trainer, X, y)

log_results(tracker, ml_adapter, config, val_results, cv_metrics, data_process_params, X)


tracker.end_run()

### Deployment Pipeline

In [None]:
%load_ext autoreload
%autoreload 2

import json
from datetime import datetime


from mlkit.config import Config
from mlkit.deploy.deploy import DeploymentManager
from mlkit.track import ExperimentTracker
from mlkit.pipeline.util import get_model_features_dict
from mlkit.deploy.registry import ModelMetadata
from mlkit.log import logger

from projects.purchase_prediction.config import feature_lib

from projects.purchase_prediction.pipeline.deploy import deploy_pipeline,load_config,initialize_components,select_best_model,convert_model
from projects.purchase_prediction.pipeline.deploy import get_model_metadata,prepare_registration_metadata,register_model,save_deployment_config

In [None]:
# 1. Load config
config_path = "config/pp_ml.yaml"
pipeline_type = "deploy"
tracking_uri = "http://localhost:5000"
registry_uri = "http://localhost:8000"

# deploy_pipeline(config_path=config_path,pipeline_type=pipeline_type,tracking_uri="http://localhost:5000",registry_uri="http://localhost:8000")

In [None]:
config, pipeline_config = load_config(config_path, pipeline_type)
config.tracking.tracking_uri = config.tracking.tracking_uri if tracking_uri is None else tracking_uri
config.deploy.register.custom.registry_uri = (
    config.deploy.register.custom.registry_uri if registry_uri is None else registry_uri
)


# Initialize components
deployer, tracker = initialize_components(config)


# Select best model
model, best_runs = select_best_model(deployer, tracker)

# Convert model
buffer_converted_model, conversion_metadata = convert_model(deployer, model, config)

# Get model metadata
metadata_info = get_model_metadata(tracker, best_runs, feature_lib, pipeline_config)

# Prepare registration metadata
register_metadata = prepare_registration_metadata(metadata_info, conversion_metadata, config)

# Register model
model_info = register_model(deployer, buffer_converted_model, register_metadata)

# Save deployment configuration
save_deployment_config(model_info, config, pipeline_config)

### Predict

In [None]:
%load_ext autoreload
%autoreload 2

import json
from typing import Any,Dict,List,Tuple
import pandas as pd
from feast import FeatureStore
from datetime import datetime,timedelta
import onnxruntime as ort

from mlkit.config.main import Config
from mlkit.deploy.deploy import DeploymentManager
#from mlkit.data.feature_store import FeatureStore
from mlkit.deploy.deploy import ModelInfo
from mlkit.data.feature_store import FeatureStore
from mlkit.config import Config

from projects.purchase_prediction.data_process import data_process_lib
from projects.purchase_prediction.config import feature_lib


from mlkit.config.data_process import ProcessStepCard
from mlkit.log import logger
from mlkit.predict import DataProcessor,InferenceService
from mlkit.config.deploy import DeployConfig

import numpy as np

In [None]:
# 1. Load config
model_register_json = "lightgbm_regression_0.1_metadata.json"
with open("lightgbm_regression_0.1_metadata.json", "r") as f:
    model_register_info = json.loads(f.read())

# config_path = "config/pp_ml.yaml"
# config = Config.load(config_path)

config_deploy = DeployConfig.from_dict(model_register_info["deploy_config"])
config_deploy.register.custom.registry_uri = "http://localhost:8000"
deployment_manager = DeploymentManager(config_deploy)

In [None]:
inferencer = InferenceService(
    model_register_info=model_register_info,
    deployment_manager=deployment_manager,
)

In [None]:
%%timeit
input_data = {"purchase_date": "2024-01-01", "age": 30, "gender": "Male", "annual_income": 50000, "customer_id": 15}
df,X = inferencer.prepare_data(input_data)
inferencer.predict(X)

## API

In [None]:
%load_ext autoreload
%autoreload 2


from api.services.pred import PredService,MetricsCollector
from api.schemas.pred import PredData,PredResponse
from api.core.config import settings

from mlkit.config.main import Config

In [None]:
settings.MINIO_ENDPOINT = "localhost:9005"
settings.MONGODB_URL = "mongodb://root:root@localhost:27018"
model_register_path = "lightgbm_regression_0.1_metadata.json"
DEPLOY_CONFIG_URI = "http://localhost:8000"


config_path = "config/pp_ml.yaml"
config = Config.load(config_path)
deploy_config = config.deploy

deploy_config.register.custom.registry_uri = DEPLOY_CONFIG_URI

In [None]:
pred_service = PredService(deploy_config=deploy_config, model_metadata_path=model_register_path, settings=settings)

In [None]:
pred_service._initialize_inference_service()

In [None]:
input_data = {"customer_id": 12, "age": 120, "gender": "Male", "annual_income": 340, "purchase_date": "2024-12-01"}

input_data = PredData(**input_data)

In [None]:
pred_service.predict_purchase(input_data)

In [None]:
import requests
import random
import numpy as np
import time

In [None]:
url = "http://localhost:8888/api/v1/pred/pred"
headers = {"accept": "application/json", "Content-Type": "application/json"}

for i in range(1000):
    data = {
        "age": random.randint(18, 80),
        "annual_income": 50000,
        "customer_id": random.randint(1, 1000),
        "gender": np.random.choice(["Male", "Female"]),
        "purchase_date": "2025-12-01",
    }

    response = requests.post(url, headers=headers, json=data)
    print(response.json())

    time.sleep(1)