In [21]:
import json
from kafka import KafkaConsumer
from river import (
    compose, 
    linear_model, 
    preprocessing, 
    metrics, 
    anomaly
)
import datetime
import pickle
import os
import pandas as pd

In [22]:
#Data processing functions
def extract_device_info(x):
    x_ = x['device_info']
    return {
        'os': x_['os'],
        'browser': x_['browser'],
    }

In [23]:
def create_pipeline():
    """Load existing model or create a new one"""
    # Create a new model pipeline
    pipe1 = compose.Select(
        "amount",
        "account_age_days",
        "cvv_provided",
        "billing_address_match"
    )
    pipe2 = compose.Select(
        "currency",
        "merchant_id",
        "payment_method",
        "product_category",
        "transaction_type",
        "user_agent"
    )
    pipe2 |= preprocessing.OrdinalEncoder()
    pipe3 = compose.Select(
        "device_info"
    )
    pipe3 |= compose.FuncTransformer(
        extract_device_info,
    )
    pipe3 |= preprocessing.OrdinalEncoder()
    pipe = pipe1 + pipe2 + pipe3
    #model = pipe | linear_model.LogisticRegression()
    #Save the model to future use
    #with open(MODEL_PATH, 'wb') as f:
    #    pickle.dump(model, f)
    return pipe

In [24]:
model = create_pipeline() | linear_model.LogisticRegression()

In [25]:
model[-1].weights

{}

In [26]:
model[0]

In [None]:
from pprint import pprint

x = {
    "transaction_id": "ffd3d366-06e4-4ddb-894e-03876e893079", 
    "user_id": "61fa227e-d309-4ed0-b513-3cffa5526463", 
    "timestamp": "2025-04-17T19:52:06.994066+00:00", 
    "amount": 302.69, 
    "currency": "BRL", 
    "merchant_id": "merchant_65", 
    "product_category": "luxury_items", 
    "transaction_type": "deposit", 
    "payment_method": "debit_card", 
    "location": {"lat": -68.4965105, "lon": -153.515477}, 
    "ip_address": "169.235.63.28", 
    "device_info": {"os": "Windows", "browser": "Opera"}, 
    "user_agent": "Mozilla/5.0 (Windows; U; Windows NT 11.0) AppleWebKit/532.16.3 (KHTML, like Gecko) Version/4.1 Safari/532.16.3", 
    "account_age_days": 370, 
    "cvv_provided": True, 
    "billing_address_match": True, 
    "is_fraud": 0}

pprint(x)

{'account_age_days': 370,
 'amount': 302.69,
 'billing_address_match': True,
 'currency': 'BRL',
 'cvv_provided': True,
 'device_info': {'browser': 'Opera', 'os': 'Windows'},
 'ip_address': '169.235.63.28',
 'is_fraud': 0,
 'location': {'lat': -68.4965105, 'lon': -153.515477},
 'merchant_id': 'merchant_65',
 'payment_method': 'debit_card',
 'product_category': 'luxury_items',
 'timestamp': '2025-04-17T19:52:06.994066+00:00',
 'transaction_id': 'ffd3d366-06e4-4ddb-894e-03876e893079',
 'transaction_type': 'deposit',
 'user_agent': 'Mozilla/5.0 (Windows; U; Windows NT 11.0) AppleWebKit/532.16.3 '
               '(KHTML, like Gecko) Version/4.1 Safari/532.16.3',
 'user_id': '61fa227e-d309-4ed0-b513-3cffa5526463'}


In [28]:
y_pred = model.predict_one(x)
model.learn_one(x, x["is_fraud"])

In [29]:
model[-1].weights

{'os': -0.005,
 'browser': -0.005,
 'merchant_id': -0.005,
 'currency': -0.005,
 'payment_method': -0.005,
 'user_agent': -0.005,
 'transaction_type': -0.005,
 'product_category': -0.005,
 'amount': -1.51345,
 'account_age_days': -1.85,
 'cvv_provided': -0.005,
 'billing_address_match': -0.005}

In [30]:
model_to_save = model.clone()

In [34]:
dir(model)

['_LEARN_UNSUPERVISED_DURING_PREDICT',
 '__abstractmethods__',
 '__add__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__or__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__ror__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_add_step',
 '_get_params',
 '_is_stochastic',
 '_last_step',
 '_memory_usage',
 '_more_tags',
 '_multiclass',
 '_mutable_attributes',
 '_raw_memory_usage',
 '_repr_html_',
 '_supervised',
 '_tags',
 '_transform_many',
 '_transform_one',
 '_unit_test_params',
 '_unit_test_skips',
 'clone',
 'debug_one',
 'forecast',
 'learn_many',
 'learn_one',
 'mutate',
 'predict_many',
 'predict_one',
 'predict_proba_many',


In [33]:
model_to_save[-1].weights

{}

In [38]:
with open('model.pkl', 'wb') as f:
    pickle.dump(model[-1], f)

In [47]:
with open('pipe.pkl', 'wb') as f:
    pickle.dump(model[0], f)

TypeError: cannot pickle 'generator' object

In [39]:
with open('model.pkl', 'rb') as f:
    retrieved_model = pickle.load(f)

In [40]:
retrieved_model.weights

{'os': -0.005,
 'browser': -0.005,
 'merchant_id': -0.005,
 'currency': -0.005,
 'payment_method': -0.005,
 'user_agent': -0.005,
 'transaction_type': -0.005,
 'product_category': -0.005,
 'amount': -1.51345,
 'account_age_days': -1.85,
 'cvv_provided': -0.005,
 'billing_address_match': -0.005}

In [43]:
retrieved_model.predict_one(x)

TypeError: can't multiply sequence by non-int of type 'float'

In [42]:
dir(model[0])

['__abstractmethods__',
 '__add__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__or__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__ror__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_add_step',
 '_get_params',
 '_is_stochastic',
 '_memory_usage',
 '_more_tags',
 '_mutable_attributes',
 '_raw_memory_usage',
 '_repr_html_',
 '_supervised',
 '_tags',
 '_unit_test_params',
 '_unit_test_skips',
 'clone',
 'learn_many',
 'learn_one',
 'mutate',
 'transform_many',
 'transform_one',
 'transformers']

In [11]:
from river import linear_model, optim

model2 = create_pipeline() | linear_model.LogisticRegression(
            loss = optim.losses.CrossEntropy(
                class_weight = {0: 1, 1: 10}),
            optimizer = optim.SGD(0.01)
        )

In [12]:
model2[-1].weights

{}

In [13]:
dir(model[-1]) == dir(model2[-1])

True

In [14]:
import pickle
with open('model_weights.pkl', 'wb') as f:
    pickle.dump(model2[-1].weights, f)

In [15]:
with open('model_weights.pkl', 'rb') as f:
    weights = pickle.load(f)
model[-1].weights = weights

AttributeError: property 'weights' of 'LogisticRegression' object has no setter

In [16]:
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

TypeError: cannot pickle 'generator' object

In [17]:
model[-1].weights = {}

AttributeError: property 'weights' of 'LogisticRegression' object has no setter

In [18]:
dir(model[-1])

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__or__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__ror__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_eval_gradient_many',
 '_eval_gradient_one',
 '_fit',
 '_get_intercept_update',
 '_get_params',
 '_is_stochastic',
 '_learn_mode',
 '_memory_usage',
 '_more_tags',
 '_multiclass',
 '_mutable_attributes',
 '_raw_dot_many',
 '_raw_dot_one',
 '_raw_memory_usage',
 '_repr_html_',
 '_supervised',
 '_tags',
 '_unit_test_params',
 '_unit_test_skips',
 '_update_weights',
 '_weights',
 '_y_name',
 'clip_gradient',
 'clone',
 'initializer',
 'intercept',
 'intercept_init',
 'intercept_lr',
 'l1',
 'l2',
 'learn_many',
 'learn_one',
 'loss',

In [19]:
model[-1].__setattr__('weights', weights)

AttributeError: property 'weights' of 'LogisticRegression' object has no setter

In [20]:
setattr

<function setattr(obj, name, value, /)>

In [48]:
import pickle

with open("../fastapi_app/predictor.pkl", "rb") as f:
    predictor = pickle.load(f)

In [51]:
dir(predictor)

['_FEATURES_LOG2',
 '_FEATURES_SQRT',
 '_UserList__cast',
 '__abstractmethods__',
 '__add__',
 '__class__',
 '__class_getitem__',
 '__contains__',
 '__copy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__or__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rmul__',
 '__ror__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_background',
 '_drift_detection_disabled',
 '_drift_detector_input',
 '_drift_detectors',
 '_drift_tracker',
 '_get_params',
 '_init_ensemble',
 '_is_stochastic',
 '_memory_usage',
 '_metrics',
 '_min_number_of_models',
 '_more_tags',
 '_multiclass',
 '_mutab

In [57]:
predictor.metric

ROCAUC: -0.00%