In [1]:
#1.Prepare Data Set
#2.feast init
#3.Update feature store
#4.Define feature definition
#5.feast apply
#6.Generate Trainning data set
#7.Model Training
#8.Prepare online feature store
import pandas as pd
from feast.types import Int64

In [2]:
from pathlib import Path
# Definir a raiz do projeto
BASE_DIR = Path("ifood").resolve().parent
print(f'Diretorio encontrado: {BASE_DIR}')

Diretorio encontrado: C:\Users\dmist\Documents\pythonProject\ifood


In [3]:
df = pd.read_csv('feature_repo/data/restaurant.csv')

In [4]:
df.head()

Unnamed: 0,id,created_at,enabled,price_range,average_ticket,takeout_time,delivery_time,minimum_order_value,merchant_zip_code,merchant_city,merchant_state,merchant_country
0,d19ff6fca6288939bff073ad0a119d25c0365c407e9e5d...,2017-01-23T12:52:30.910Z,False,3,60.0,0,50.0,30.0,14025,RIBEIRAO PRETO,SP,BR
1,631df0985fdbbaf27b9b031a8f381924e3483833385748...,2017-01-20T13:14:48.286Z,True,3,60.0,0,0.0,30.0,50180,SAO PAULO,SP,BR
2,135c5c4ae4c1ec1fdc23e8c649f313e39be8db913d8bc5...,2017-01-23T12:46:33.457Z,True,5,100.0,0,45.0,10.0,23090,RIO DE JANEIRO,RJ,BR
3,d26f84c470451f752bef036c55517b6d9950d41806f10e...,2017-01-20T13:15:04.806Z,True,3,80.0,0,0.0,18.9,40255,SALVADOR,BA,BR
4,97b9884600ea7192314580d9115f8882b8634f5aa201ff...,2017-01-20T13:14:27.701Z,True,3,60.0,0,0.0,25.0,64600,BARUERI,SP,BR


In [5]:
# Convertendo a coluna de string para datetime
df['datetime_col'] = pd.to_datetime(df['created_at'], format='%Y-%m-%dT%H:%M:%S.%fZ')

# Convertendo a coluna de string para timestamp
# Dividindo por 10^9 para obter o timestamp em segundos
df['timestamp_col'] = df['datetime_col'].astype('int64') // 10**9  

In [6]:
#Change dataset to Parquet
df.to_parquet(path='feature_repo/data/restaurant.parquet')

In [7]:
df.head()

Unnamed: 0,id,created_at,enabled,price_range,average_ticket,takeout_time,delivery_time,minimum_order_value,merchant_zip_code,merchant_city,merchant_state,merchant_country,datetime_col,timestamp_col
0,d19ff6fca6288939bff073ad0a119d25c0365c407e9e5d...,2017-01-23T12:52:30.910Z,False,3,60.0,0,50.0,30.0,14025,RIBEIRAO PRETO,SP,BR,2017-01-23 12:52:30.910,1485175950
1,631df0985fdbbaf27b9b031a8f381924e3483833385748...,2017-01-20T13:14:48.286Z,True,3,60.0,0,0.0,30.0,50180,SAO PAULO,SP,BR,2017-01-20 13:14:48.286,1484918088
2,135c5c4ae4c1ec1fdc23e8c649f313e39be8db913d8bc5...,2017-01-23T12:46:33.457Z,True,5,100.0,0,45.0,10.0,23090,RIO DE JANEIRO,RJ,BR,2017-01-23 12:46:33.457,1485175593
3,d26f84c470451f752bef036c55517b6d9950d41806f10e...,2017-01-20T13:15:04.806Z,True,3,80.0,0,0.0,18.9,40255,SALVADOR,BA,BR,2017-01-20 13:15:04.806,1484918104
4,97b9884600ea7192314580d9115f8882b8634f5aa201ff...,2017-01-20T13:14:27.701Z,True,3,60.0,0,0.0,25.0,64600,BARUERI,SP,BR,2017-01-20 13:14:27.701,1484918067


In [8]:
#!feast apply

In [9]:
#Trainning dataset
from feast import FeatureStore
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

store = FeatureStore(repo_path=f'{BASE_DIR}/feature_repo')

entity_df = pd.read_parquet(path = 'feature_repo/data/restaurant.parquet')

training_data = store.get_historical_features(
entity_df = entity_df,
    features = [
        "restaurant_fview:enabled",
        "restaurant_fview:price_range",
        "restaurant_fview:average_ticket",
        "restaurant_fview:takeout_time",
        "restaurant_fview:delivery_time",
        "restaurant_fview:minimum_order_value",
        "restaurant_fview:merchant_zip_code",
        "restaurant_fview:merchant_city",
        "restaurant_fview:merchant_state",
        "restaurant_fview:merchant_country"
    ]
)

dataset = store.create_saved_dataset(
from_=training_data,
    name = "restaurant_dataset",
    storage = SavedDatasetFileStorage('feature_repo/data/restaurant_dataset1.parquet')
)



Using datetime_col as the event timestamp. To specify a column explicitly, please name it event_timestamp.


SavedDatasetLocationAlreadyExists: Saved dataset location feature_repo/data/restaurant_dataset1.parquet already exists.

In [10]:
#Training Show
training_data.to_df()

Unnamed: 0,id,created_at,enabled,price_range,average_ticket,takeout_time,delivery_time,minimum_order_value,merchant_zip_code,merchant_city,...,enabled__,price_range__,average_ticket__,takeout_time__,delivery_time__,minimum_order_value__,merchant_zip_code__,merchant_city__,merchant_state__,merchant_country__
0,2458597b6740ab52e6834cb92b07072feba38ef303f467...,2017-01-20T13:12:43.554Z,False,1,30.0,30,15.0,0.0,14510,SAO PAULO,...,False,1,30.0,30,15.0,0.0,14510,SAO PAULO,SP,BR
1,645727eda2a5c327f54a7625316f29d52cefd9fb54bd92...,2017-01-20T13:12:44.459Z,False,4,80.0,30,15.0,10.0,14260,SAO PAULO,...,False,4,80.0,30,15.0,10.0,14260,SAO PAULO,SP,BR
2,71319a1000b14c73cb2e364bd9a6f7b807c1cc5f9cef1d...,2017-01-20T13:12:44.503Z,False,5,81.0,30,15.0,0.0,14140,SAO PAULO,...,False,5,81.0,30,15.0,0.0,14140,SAO PAULO,SP,BR
3,c2c2ee7d9350cfcef02c6d333209a1a3ab9c9af932be05...,2017-01-20T13:12:45.246Z,False,5,81.0,30,15.0,10.0,40030,SAO PAULO,...,False,5,81.0,30,15.0,10.0,40030,SAO PAULO,SP,BR
4,1d90c0ccab1b77970dd302dbaf6adefda495bd58ab44bc...,2017-01-20T13:12:45.540Z,False,5,81.0,30,15.0,10.0,40760,SAO PAULO,...,False,5,81.0,30,15.0,10.0,40760,SAO PAULO,SP,BR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7287,bf9d6811d3bd93f5fb1a9166658660b13c122595ab38a8...,2017-01-23T12:54:23.246Z,False,4,80.0,0,45.0,20.0,88015,FLORIANOPOLIS,...,False,4,80.0,0,45.0,20.0,88015,FLORIANOPOLIS,SC,BR
7288,fc061ae616f83a94c53ebb548188cd7c09fc6482182a98...,2017-01-23T12:54:32.760Z,False,1,30.0,0,30.0,13.0,22741,SAO PAULO,...,False,1,30.0,0,30.0,13.0,22741,SAO PAULO,SP,BR
7289,cc5514812859f90b6d116417ba3732edc89482b176bcfa...,2017-01-23T12:54:39.558Z,False,1,30.0,0,50.0,10.0,39620,SAO PAULO,...,False,1,30.0,0,50.0,10.0,39620,SAO PAULO,SP,BR
7290,3c3126fab54a9d1a15af43fcda1fde2c49eb18d9ac0423...,2017-01-23T12:54:49.183Z,False,1,30.0,0,60.0,0.0,51020,RECIFE,...,False,1,30.0,0,60.0,0.0,51020,RECIFE,PE,BR


In [11]:
#Model training
# Importing dependencies
from feast import FeatureStore
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from joblib import dump

# Getting our FeatureStore
store = FeatureStore(repo_path=f'{BASE_DIR}/feature_repo')

# Retrieving the saved dataset and converting it to a DataFrame
training_df = training_data.to_df() #store.get_saved_dataset(name="restaurant_dataset").to_df()
#training_df = training_df.dropna(subset=['price_range'])

# Separating the features and labels
y = training_df['price_range']
X = training_df.drop(
    labels=['price_range', 'datetime_col', "id"], 
    axis=1)

# Apply One-Hot Encoding to all categorical columns
# ValueError: could not convert string to float: '2017-01-20T13:14:42.202Z'
X = pd.get_dummies(X, drop_first=True)

# Splitting the dataset into train and test sets
#X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# Remover valores ausentes
X_train = X_train.dropna()
y_train = y_train[X_train.index] 

# Creating and training LogisticRegression
reg = LogisticRegression(max_iter = 200)
reg.fit(X=X_train, y=y_train)

# Saving the model
dump(value=reg, filename="model/model.joblib")


['model/model.joblib']

In [12]:
# Ver tipo de cada coluna
#print(X.dtypes)

# Contar vazios
print(X_train.isnull().sum())

enabled                0
average_ticket         0
takeout_time           0
delivery_time          0
minimum_order_value    0
                      ..
merchant_state___RN    0
merchant_state___RS    0
merchant_state___SC    0
merchant_state___SE    0
merchant_state___SP    0
Length: 7661, dtype: int64


In [16]:
#Materialized
#Prepare Online Feature Store
# Importing dependencies
from feast import FeatureStore
from datetime import datetime, timedelta

# Getting our FeatureStore
store = FeatureStore(repo_path=f'{BASE_DIR}/feature_repo')

store.materialize_incremental(end_date = datetime.now())

#store.materialize(start_date=datetime.utcnow() - timedelta(days=530), end_date=datetime.utcnow() - timedelta(days=10))




Materializing [1m[32m2[0m feature views to [1m[32m2024-10-24 10:43:57-03:00[0m into the [1m[32msqlite[0m online store.

[1m[32mrestaurant_feature_view[0m from [1m[32m2024-10-24 09:43:57-03:00[0m to [1m[32m2024-10-24 10:43:57-03:00[0m:


AttributeError: 'str' object has no attribute 'tzinfo'

In [18]:
#Get online features
# Importing dependencies
from feast import FeatureStore
import pandas as pd
from joblib import load

# Getting our FeatureStore
store = FeatureStore(repo_path=f'{BASE_DIR}/feature_repo')

# Defining our features names
feast_features = [
        "restaurant_fview:enabled",
        "restaurant_fview:price_range",
        "restaurant_fview:average_ticket",
        "restaurant_fview:takeout_time",
        "restaurant_fview:delivery_time",
        "restaurant_fview:minimum_order_value",
        "restaurant_fview:merchant_zip_code",
        "restaurant_fview:merchant_city",
        "restaurant_fview:merchant_state",
        "restaurant_fview:merchant_country",
    ]

# Getting the latest features
features = store.get_online_features(
    features=feast_features,    
    entity_rows=[{"id": '2458597b6740ab52e6834cb92b07072feba38ef303f4676575ab963513275b3b'}, {"id": 'ac47c7dde8af939606999bd912979082f0c2667a0bc0183712375b5f9ca40fc7'}]
).to_dict()

# Converting the features to a DataFrame
features_df = pd.DataFrame.from_dict(data=features)



In [19]:
features_df.head()

Unnamed: 0,id,price_range,merchant_country,takeout_time,delivery_time,minimum_order_value,average_ticket,enabled,merchant_zip_code,merchant_state,merchant_city
0,2458597b6740ab52e6834cb92b07072feba38ef303f467...,,,,,,,,,,
1,ac47c7dde8af939606999bd912979082f0c2667a0bc018...,,,,,,,,,,


In [20]:
# Loading our model and doing inference
#reg = load("model.joblib")
#predictions = reg.predict(features_df[sorted(features_df.drop("id", axis=1))])
#print(predictions)

def load_model(model_path):
    """Carrega o modelo treinado."""
    try:
        model = load(model_path)
        print(f"Modelo carregado com sucesso de {model_path}")
        return model
    except Exception as e:
        print(f"Erro ao carregar o modelo: {e}")
        raise
    
def preprocess_features(features_df):
    """Pré-processa o DataFrame de features para garantir que as colunas estejam corretas."""
    try:
        # Remover a coluna 'id' e ordenar as colunas para garantir compatibilidade com o modelo
        preprocessed_features = features_df.drop("id", axis=1)
        preprocessed_features = preprocessed_features[sorted(preprocessed_features.columns)]
        
        # Verificar se há valores nulos
        if preprocessed_features.isnull().sum().sum() > 0:
            raise ValueError("Há valores nulos nas features fornecidas.")
        
        return preprocessed_features
    except Exception as e:
        print(f"Erro durante o pré-processamento das features: {e}")
        raise
    
def make_predictions(model, features_df):
    """Realiza previsões usando o modelo treinado e as features fornecidas."""
    try:
        predictions = model.predict(features_df)
        return predictions
    except Exception as e:
        print(f"Erro ao fazer previsões: {e}")
        raise

# Caminho do modelo treinado
model_path = "model/model.joblib"

# Carregar o modelo treinado
reg = load_model(model_path)

# Pré-processar as features
features_df_preprocessed = preprocess_features(features_df)

# Realizar as previsões
predictions = make_predictions(reg, features_df_preprocessed)

# Exibir as previsões
print(predictions)

Modelo carregado com sucesso de model/model.joblib
Erro durante o pré-processamento das features: Há valores nulos nas features fornecidas.


ValueError: Há valores nulos nas features fornecidas.

In [21]:
#API FasAPI
#Definir Caminho para Chamar API
# Definir a raiz do projeto da API
BASE_DIR_API = f'{BASE_DIR}' 

print(f'Diretorio api: {BASE_DIR_API}')


Diretorio api: C:\Users\dmist\Documents\pythonProject\ifood


In [23]:
#Iniciar a API via linha de comando (cmd/terminal)
#acessar o diretorio api onde está o arquivo main.py
#executar o comando abaixo. Trocar a porta para 9000 em caso de conflito
!uvicorn main:app --reload --port 8080


INFO:     Will watch for changes in these directories: ['C:\\Users\\dmist\\Documents\\pythonProject\\ifood']
INFO:     Uvicorn running on http://127.0.0.1:8080 (Press CTRL+C to quit)
INFO:     Started reloader process [21984] using WatchFiles
INFO:     Stopping reloader process [21984]


In [None]:
#Após executar API, acessar a documentação em: http://localhost:8080/docs