In [9]:

import pandas as pd
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, f1_score
from pycaret.classification import setup, create_model, predict_model, save_model


def Treinamento(
    target: str = "shot_made_flag",
    modelo_saida: str = "modelo_final"
):
    """Treina dois modelos com PyCaret, registra métricas no MLflow e retorna o melhor."""
         
    df_train = catalog.load("base_train")
    df_test = catalog.load("base_test")
    
    # Inicia a run no MLflow
    with mlflow.start_run(run_name="Treinamento"):
        # 1. Setup PyCaret
        exp = setup(
            data=df_train,
            target=target,
            session_id=42,
            log_experiment=False,  # gerencia logs via mlflow diretamente
        )
  
        # 2. Treinamento dos modelos
        lr = create_model("lr")      # Regressão Logística
        dt = create_model("dt")      # Decision Tree
        
        # 3. Avaliação na base de teste
        # Previsões
        pred_lr = predict_model(lr, data=df_test)
        pred_dt = predict_model(dt, data=df_test)

        print(pred_dt)
        # PyCaret adiciona coluna "Label" e "Score"
        y_true = df_test[target].values
        y_proba_lr = pred_lr["prediction_score"].values
        y_proba_dt = pred_dt["prediction_score"].values
        y_pred_dt = pred_dt["prediction_label"].values
        
        # Métricas
        ll_lr = log_loss(y_true, y_proba_lr)
        ll_dt = log_loss(y_true, y_proba_dt)
        f1_dt = f1_score(y_true, y_pred_dt)
 
        # 4. Log das métricas
        mlflow.log_metric("logloss_lr", ll_lr)
        mlflow.log_metric("logloss_dt", ll_dt)
        mlflow.log_metric("f1_dt", f1_dt)
        
        # 5. Seleção do modelo final
        # Critério: menor log loss; em caso de empate, escolha aquele com maior F1
        if ll_lr < ll_dt:
            best_model, best_name = lr, "lr"
        elif ll_dt < ll_lr:
            best_model, best_name = dt, "dt"
        else:
            # empataram em logloss → escolher dt se F1 maior, senão lr
            best_model, best_name = (dt, "dt") if f1_dt > 0 else (lr, "lr")
        
        mlflow.log_param("modelo_escolhido", best_name)
        
        return best_model

Treinamento()

Unnamed: 0,Description,Value
0,Session id,42
1,Target,shot_made_flag
2,Target type,Binary
3,Original data shape,"(16228, 7)"
4,Transformed data shape,"(16228, 7)"
5,Transformed train set shape,"(11359, 7)"
6,Transformed test set shape,"(4869, 7)"
7,Numeric features,6
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.588,0.6183,0.5166,0.5761,0.5447,0.1706,0.1714
1,0.5907,0.6024,0.5092,0.5811,0.5428,0.1752,0.1764
2,0.5854,0.6107,0.4908,0.577,0.5304,0.1636,0.1653
3,0.5836,0.6029,0.5037,0.5723,0.5358,0.1611,0.1622
4,0.6012,0.623,0.5111,0.5957,0.5501,0.1958,0.1976
5,0.5607,0.5815,0.4871,0.5443,0.5141,0.1156,0.1162
6,0.5511,0.5791,0.4446,0.5356,0.4859,0.0935,0.0948
7,0.5801,0.6145,0.4659,0.575,0.5148,0.1518,0.1544
8,0.581,0.5988,0.4954,0.5711,0.5306,0.1556,0.1569
9,0.5498,0.5685,0.4539,0.5336,0.4905,0.0919,0.0929


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5282,0.5106,0.5756,0.5049,0.5379,0.0601,0.0607
1,0.5255,0.5076,0.5461,0.5025,0.5234,0.0527,0.0528
2,0.5335,0.5056,0.6107,0.5092,0.5554,0.073,0.0744
3,0.5176,0.5033,0.5554,0.4951,0.5235,0.0383,0.0386
4,0.529,0.5151,0.5738,0.5057,0.5376,0.0617,0.0622
5,0.5229,0.5113,0.5683,0.5,0.532,0.0495,0.0499
6,0.5687,0.5643,0.6144,0.5423,0.5761,0.1405,0.1416
7,0.5352,0.5156,0.5709,0.5124,0.5401,0.0731,0.0735
8,0.5273,0.5118,0.5727,0.5049,0.5367,0.0581,0.0586
9,0.5269,0.5162,0.5572,0.5042,0.5294,0.0561,0.0564


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.5728,0.5941,0.4729,0.5627,0.5139,0.138,0.1397


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Decision Tree Classifier,0.5263,0.5026,0.5823,0.5033,0.54,0.057,0.0576


             lat         lon  minutes_remaining  period  playoffs  \
24157  33.888302 -118.382797                 11       3         1   
441    34.021301 -118.159798                  2       3         0   
20143  33.983299 -118.120796                 10       2         0   
12366  34.044300 -118.269798                  0       4         0   
7527   34.044300 -118.269798                  0       1         0   
...          ...         ...                ...     ...       ...   
275    34.031300 -118.298798                 11       2         0   
882    34.042301 -118.184799                  6       1         0   
18167  34.044300 -118.269798                  3       4         0   
11830  33.934299 -118.377800                  5       1         0   
18785  33.850300 -118.281799                  0       1         0   

       shot_distance  shot_made_flag  prediction_label  prediction_score  
24157             19             0.0                 1            1.0000  
441               11 

In [1]:
print("teste")

teste


In [3]:
!conda activate kobe_pycaret

In [5]:
!pip install scikit-learn==1.0.2

Collecting scikit-learn==1.0.2
  Downloading scikit-learn-1.0.2.tar.gz (6.7 MB)
     ---------------------------------------- 0.0/6.7 MB ? eta -:--:--
     --- ------------------------------------ 0.5/6.7 MB 4.2 MB/s eta 0:00:02
     --------- ------------------------------ 1.6/6.7 MB 5.2 MB/s eta 0:00:01
     ------------------ --------------------- 3.1/6.7 MB 5.9 MB/s eta 0:00:01
     ------------------------ --------------- 4.2/6.7 MB 6.0 MB/s eta 0:00:01
     ------------------------ --------------- 4.2/6.7 MB 6.0 MB/s eta 0:00:01
     ------------------------------- -------- 5.2/6.7 MB 4.6 MB/s eta 0:00:01
     ----------------------------------- ---- 6.0/6.7 MB 4.4 MB/s eta 0:00:01
     ---------------------------------------- 6.7/6.7 MB 4.4 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Prepari

  error: subprocess-exited-with-error
  
  Preparing metadata (pyproject.toml) did not run successfully.
  exit code: 1
  
  [66 lines of output]
  Partial import of sklearn during the build process.
  
    `numpy.distutils` is deprecated since NumPy 1.23.0, as a result
    of the deprecation of `distutils` itself. It will be removed for
    Python >= 3.12. For older Python versions it will remain present.
    It is recommended to use `setuptools < 60.0` for those Python versions.
    For more details, see:
      https://numpy.org/devdocs/reference/distutils_status_migration.html
  
  
    from numpy.distutils.command.build_ext import build_ext  # noqa
  INFO: No module named 'numpy.distutils._msvccompiler' in numpy.distutils; trying from distutils
  Traceback (most recent call last):
    File "C:\Users\paulo.rogerio\AppData\Local\anaconda3\envs\kobe_pycaret\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 389, in <module>
      main()
    File "C:\Users\

In [17]:
!pip install scikit-learn==1.1.3

Collecting scikit-learn==1.1.3
  Downloading scikit_learn-1.1.3-cp311-cp311-win_amd64.whl.metadata (10 kB)
Downloading scikit_learn-1.1.3-cp311-cp311-win_amd64.whl (7.5 MB)
   ---------------------------------------- 0.0/7.5 MB ? eta -:--:--
   -- ------------------------------------- 0.5/7.5 MB 5.6 MB/s eta 0:00:02
   ----------- ---------------------------- 2.1/7.5 MB 6.5 MB/s eta 0:00:01
   ----------- ---------------------------- 2.1/7.5 MB 6.5 MB/s eta 0:00:01
   ------------- -------------------------- 2.6/7.5 MB 3.4 MB/s eta 0:00:02
   ---------------- ----------------------- 3.1/7.5 MB 3.7 MB/s eta 0:00:02
   ------------------ --------------------- 3.4/7.5 MB 2.8 MB/s eta 0:00:02
   ----------------------- ---------------- 4.5/7.5 MB 3.2 MB/s eta 0:00:01
   ------------------------------ --------- 5.8/7.5 MB 3.5 MB/s eta 0:00:01
   ------------------------------------ --- 6.8/7.5 MB 3.7 MB/s eta 0:00:01
   ---------------------------------------  7.3/7.5 MB 3.8 MB/s eta 0:00:0

  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
imbalanced-learn 0.13.0 requires scikit-learn<2,>=1.3.2, but you have scikit-learn 1.1.3 which is incompatible.
mlxtend 0.23.4 requires scikit-learn>=1.3.1, but you have scikit-learn 1.1.3 which is incompatible.
pycaret 3.3.0 requires scikit-learn>1.4.0, but you have scikit-learn 1.1.3 which is incompatible.
sklearn-compat 0.1.3 requires scikit-learn<1.7,>=1.2, but you have scikit-learn 1.1.3 which is incompatible.


In [19]:
!pip install --upgrade pycaret

