# Desenvolvimento do modelo para prediçao de laptop 

Exploracao inicial de diferentes tipos de modelos

In [1]:
#%pip install dagshub
#%pip install catboost
#%pip install xgboost
#%pip install lightgbm

In [1]:
import pandas as pd
import mlflow
import dagshub
from dagshub.data_engine import datasources
from sklearn.model_selection import train_test_split
import mlflow.sklearn
import mlflow.catboost
from catboost import CatBoostRegressor
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer,mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from mlflow.models import infer_signature

# Obtendo dados do dataset

In [2]:
ds = datasources.get('rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing', 'processed')

In [3]:
ds.all().dataframe

Unnamed: 0,path,datapoint_id,dagshub_download_url,media type,size
0,laptop-price-brl-processed.csv,103594502,https://dagshub.com/api/v1/repos/rrmoreira/fia...,text/plain,52552


In [4]:
res = ds.head()

for dp in res:
    dataset_url = dp.download_url

In [5]:
dataset_url

'https://dagshub.com/api/v1/repos/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing/raw/main/data/processed/laptop-price-brl-processed.csv'

In [6]:
df = pd.read_csv(dataset_url)
df.head()

Unnamed: 0,ram_gb,ssd,hdd,graphic_card_gb,warranty,price,brand_asus,brand_dell,brand_hp,brand_lenovo,...,os_windows,os_bit_32-bit,os_bit_64-bit,ram_type_ddr4,ram_type_other,weight_casual,weight_gaming,weight_thinnlight,touchscreen_0,touchscreen_1
0,4,0,1024,0,0,2321,1,0,0,0,...,1,0,1,1,0,1,0,0,1,0
1,4,0,1024,0,0,2613,0,0,0,1,...,1,0,1,1,0,1,0,0,1,0
2,4,0,1024,0,0,2680,0,0,0,1,...,1,0,1,1,0,1,0,0,1,0
3,8,512,0,2,0,4689,1,0,0,0,...,1,1,0,1,0,1,0,0,1,0
4,4,0,512,0,0,1808,1,0,0,0,...,1,0,1,1,0,1,0,0,1,0


In [7]:
dagshub.init(repo_owner="rrmoreira", repo_name="fiap-ds-mlops-9dtsr-laptop-pricing", mlflow=True)

In [8]:
mlflow.autolog()

2025/07/30 22:09:14 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/07/30 22:09:14 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2025/07/30 22:09:14 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.


In [9]:
features = list(df.columns)
features.remove('price') #target variable

In [10]:
features

['ram_gb',
 'ssd',
 'hdd',
 'graphic_card_gb',
 'warranty',
 'brand_asus',
 'brand_dell',
 'brand_hp',
 'brand_lenovo',
 'brand_other',
 'processor_brand_amd',
 'processor_brand_intel',
 'processor_brand_m1',
 'processor_name_core i3',
 'processor_name_core i5',
 'processor_name_core i7',
 'processor_name_other',
 'processor_name_ryzen 5',
 'os_other',
 'os_windows',
 'os_bit_32-bit',
 'os_bit_64-bit',
 'ram_type_ddr4',
 'ram_type_other',
 'weight_casual',
 'weight_gaming',
 'weight_thinnlight',
 'touchscreen_0',
 'touchscreen_1']

In [11]:
X = df[features]

In [12]:
len(features)

29

In [13]:
y = df['price']
y

0       2321
1       2613
2       2680
3       4689
4       1808
       ...  
775     9111
776     9714
777    10049
778     9580
779     3852
Name: price, Length: 780, dtype: int64

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=0.3, random_state=42)

In [16]:
def evaluate_and_log_model(kind, model_name, model, X_test, y_test):
   predictions = model.predict(X_test)
   mse = mean_squared_error(y_test, predictions)
   mae = mean_absolute_error(y_test, predictions)
   r2 = r2_score(y_test, predictions)
   mape = mean_absolute_percentage_error(y_test, predictions)
   mlflow.log_metric("MSE", mse)
   mlflow.log_metric("MAE", mae)
   mlflow.log_metric("R2", r2)
   mlflow.log_metric("MAPE", mape)
   # Inferir a assinatura automaticamente
   signature = infer_signature(X_test, predictions)
   if kind == "catboost":
      mlflow.catboost.log_model(model, model_name, signature=signature, input_example=X_test[:5])
   elif kind == "xgboost":
      mlflow.xgboost.log_model(model, model_name, signature=signature, input_example=X_test[:5])
   elif kind == "lightgbm":
      mlflow.lightgbm.log_model(model, model_name, signature=signature, input_example=X_test[:5])
   else:
      mlflow.sklearn.log_model(model, model_name, signature=signature, input_example=X_test[:5])
      
   print(f"Model {model_name} logged with MSE: {mse}, MAE: {mae}, R2: {r2}, MAPE: {mape}")

### Experimento com Ridge Regression

In [24]:
with mlflow.start_run(run_name="Ridge Regression"):
    param_grid = {'alpha': [0.1, 1.0, 10.0, 100.0]} 
    ridge = Ridge()
    grid_search = GridSearchCV(ridge, param_grid, cv=5, scoring=make_scorer(mean_absolute_error, greater_is_better=False))
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_
    mlflow.log_param("best_alpha", best_model.alpha)
    evaluate_and_log_model("sklearn", "ridge_regression", best_model, X_test, y_test)

2025/07/30 21:18:54 INFO mlflow.sklearn.utils: Logging the 5 best runs, no runs will be omitted.


🏃 View run stately-ape-173 at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/d4b25e019d9f4469b52e081dd7d5add1
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0
🏃 View run able-horse-772 at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/4f1b2f373fd1492480390bb1a16337d1
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0
🏃 View run stylish-pug-890 at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/b730d1ac5118469f9b2db7e30b87823e
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0
🏃 View run ambitious-vole-478 at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/8db9108fc425452e92d4cccdd1d47379
🧪 View experiment at: https://dagshub.co

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 229.56it/s]


Model ridge_regression logged with MSE: 3062335.917236299, MAE: 1141.6800302647246, R2: 0.637577032507391, MAPE: 0.2191235407782589
🏃 View run Ridge Regression at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/4ecb76c1847d403ca19dcbf307bc7793
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0


🏃 View run intrigued-snipe-895 at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/afc52f08f83c4137a7d457b7c3a1320a
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0
🏃 View run funny-bird-900 at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/419c11157e264e818d46684bc20010ac
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0
🏃 View run exultant-finch-801 at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/bfe59bac1ca440a0b4ce6aa306e1723b
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0


### Experimento com Decision Tree Regression

In [26]:
with mlflow.start_run(run_name="DecisionTree_Regressor"):
    param_grid = {
        'max_depth': [3, 5, 10, None],
        'min_samples_split': [2, 5, 10]
    }
    tree = DecisionTreeRegressor(random_state=42)
    
    grid_search = GridSearchCV(tree, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)
    
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_max_depth", best_model.max_depth)
    mlflow.log_param("best_min_samples_split", best_model.min_samples_split)
    evaluate_and_log_model("sklearn", "Decision Tree Regressor", best_model, X_test, y_test)


2025/07/30 21:40:25 INFO mlflow.sklearn.utils: Logging the 5 best runs, 7 runs will be omitted.


🏃 View run honorable-squid-63 at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/3961c59bf640432a8e25a0c0bb3a1e67
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 280.92it/s]


Model Decision Tree Regressor logged with MSE: 3986435.8604392665, MAE: 1088.429145200087, R2: 0.5282111586363019, MAPE: 0.17674529589898663
🏃 View run DecisionTree_Regressor at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/4628ab04865a45db91cb8dac7cadfe3e
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0


### Experimento com MLP Regression

In [18]:
with mlflow.start_run(run_name="MLP Regressor"):
    param_grid = {
        'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
        'activation': ['relu', 'tanh'],
        'alpha': [0.0001, 0.001, 0.01]
        } 
    mlp = MLPRegressor(max_iter=500, random_state=42)
    grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring=make_scorer(mean_absolute_error, greater_is_better=False))
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_
    
    mlflow.log_param("best_hidden_layer_sizes", best_model.hidden_layer_sizes)
    mlflow.log_param("best_activation", best_model.activation)
    mlflow.log_param("best_alpha", best_model.alpha)
    
    evaluate_and_log_model("sklearn", "MLP Regressor", best_model, X_test, y_test)

2025/07/30 22:18:37 INFO mlflow.sklearn.utils: Logging the 5 best runs, 19 runs will be omitted.
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 183.82it/s]


Model MLP Regressor logged with MSE: 3552567.5395040526, MAE: 1248.7731592400173, R2: 0.5795588385199266, MAPE: 0.23322553845509145
🏃 View run MLP Regressor at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/38e8156bb07a44b790fbb6fe0ce07bdc
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0


### Experimento com XGBoost Regressor

In [20]:
with mlflow.start_run(run_name="XGBoost Regressor"):
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [3, 5, 7, 9],
        'learning_rate': [0.01, 0.1, 0.2, 0.3]
    }
    xgb = XGBRegressor(random_state=42, verbosity=0)
    
    grid_search = GridSearchCV(xgb, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)
    
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_n_estimators", best_model.n_estimators)
    mlflow.log_param("best_max_depth", best_model.max_depth)
    mlflow.log_param("best_learning_rate", best_model.learning_rate)
    
    evaluate_and_log_model("xgboost", "XGBoost Regressor", best_model, X_test, y_test)  

2025/07/30 22:28:30 INFO mlflow.sklearn.utils: Logging the 5 best runs, 43 runs will be omitted.
  self.get_booster().save_model(fname)
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 214.63it/s]


Model XGBoost Regressor logged with MSE: 2642192.25, MAE: 899.0316772460938, R2: 0.687300443649292, MAPE: 0.14656654000282288
🏃 View run XGBoost Regressor at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/3039d0e0e9374c9b8e5b21c1d4a93839
🧪 View experiment at: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0


## Model Registry

In [21]:
#View run XGBoost Regressor - melhor modelo: https://dagshub.com/rrmoreira/fiap-ds-mlops-9dtsr-laptop-pricing.mlflow/#/experiments/0/runs/3039d0e0e9374c9b8e5b21c1d4a93839

run_id = '3039d0e0e9374c9b8e5b21c1d4a93839'

mlflow.register_model(
    model_uri=f"runs:/{run_id}/model",
    name="laptop-pricing-model-brl"
)


Successfully registered model 'laptop-pricing-model-brl'.
2025/07/30 23:00:27 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: laptop-pricing-model-brl, version 1
Created version '1' of model 'laptop-pricing-model-brl'.


<ModelVersion: aliases=[], creation_timestamp=1753927227474, current_stage='None', description='', last_updated_timestamp=1753927227474, name='laptop-pricing-model-brl', run_id='3039d0e0e9374c9b8e5b21c1d4a93839', run_link='', source='mlflow-artifacts:/a0cd437434354e3a90f60b591ac0abf7/3039d0e0e9374c9b8e5b21c1d4a93839/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>