### Config

In [4]:
import pandas as pd
import sqlite3
import requests

import datetime
import logging
import json
import time

In [5]:
import mlflow
import dagshub
from mlflow.client import MlflowClient

In [6]:
import os
from dotenv import load_dotenv

In [7]:
%load_ext autoreload
%autoreload 2

In [8]:
load_dotenv()

True

In [9]:
import os

In [10]:
os.environ["MLFLOW_TRACKING_USERNAME"]

'9b8ef5e7ef0f8aea4180e2be787184cded5bce07'

In [11]:
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG, format="%(message)s")

In [12]:
mlflow.set_tracking_uri("https://dagshub.com/josmunpen/laliga-oracle-dags.mlflow")

In [13]:
dagshub.init(repo_owner="josmunpen", repo_name="laliga-oracle-dags", mlflow=True)

HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


Accessing as josmunpen
HTTP Request: GET https://dagshub.com/api/v1/repos/josmunpen/laliga-oracle-dags "HTTP/1.1 200 OK"
HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


Initialized MLflow to track repo "josmunpen/laliga-oracle-dags"


Repository josmunpen/laliga-oracle-dags initialized!


In [14]:
# mlflow.login()

In [15]:
date_version = datetime.datetime.now().strftime("%Y-%m-%d")

In [16]:
# Create a new MLflow Experiment
mlflow.set_experiment(f"LaLigaOracle_{date_version}")

Starting new HTTPS connection (1): dagshub.com:443
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/experiments/get-by-name?experiment_name=LaLigaOracle_2024-11-24 HTTP/11" 404 None
2024/11/24 12:58:00 INFO mlflow.tracking.fluent: Experiment with name 'LaLigaOracle_2024-11-24' does not exist. Creating a new experiment.
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/experiments/create HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/experiments/get?experiment_id=6 HTTP/11" 200 None


<Experiment: artifact_location='mlflow-artifacts:/9b46a3ddf778450bbcb10264d60e58af', creation_time=1732449460355, experiment_id='6', last_update_time=1732449460355, lifecycle_stage='active', name='LaLigaOracle_2024-11-24', tags={}>

In [17]:
mlflow_client = MlflowClient(mlflow.get_tracking_uri())

### Read and join data

In [30]:
from sqlalchemy import create_engine, URL
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import os
from dotenv import load_dotenv

In [31]:
SQLALCHEMY_DATABASE_URL = URL.create(
    "postgresql",
    username=os.getenv("PGUSER"),
    password=os.getenv("PGPASSWORD"),
    host=os.getenv("PGHOST"),
    database=os.getenv("PGDATABASE"),
    port=os.getenv("PGPORT"),
)

engine = create_engine(SQLALCHEMY_DATABASE_URL)

SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

In [32]:
seasons_to_train = [2022, 2023]

In [33]:
def get_year_fixtures(season):
    with SessionLocal() as session:
        available_fixtures = pd.read_sql(
            f"""
                                SELECT DISTINCT(fixture)
                                FROM matches
                                WHERE season={season}
                                """,
            con=session.bind,
        )

    res = [value[0] for value in available_fixtures.values]
    return res

In [34]:
a =get_year_fixtures(2023)

In [35]:
dfs = []
for season in seasons_to_train:
    available_fixtures = get_year_fixtures(season)
    for fixture in available_fixtures:
        with SessionLocal() as session:
            # Get fixture matches data
            df_matches = pd.read_sql(
                f"""
                                        SELECT *
                                        FROM matches
                                        WHERE fixture={fixture} 
                                            AND season={season}
                                        """,
                con=session.bind,
            )

            # Get teams data (filter by last data available before match date)
            df_teams = pd.read_sql(
                f"""
                                        SELECT *
                                        FROM teams
                                        WHERE teams.query_date = (
                                            SELECT MAX(teams.query_date)
                                            FROM teams
                                            WHERE teams.query_date <= (
                                                SELECT MAX(matches.match_date)
                                                FROM matches
                                                WHERE fixture = {fixture}
                                            )
                                            AND teams.season = {season}
                                        )
                                    """,
                con=session.bind,
            )

        df_aux = df_matches.merge(
            df_teams,
            left_on="team_home",
            right_on="team_id",
            how="left",
            suffixes=(None, "_index_home"),
        )
        df_aux.drop(columns=["team_id", "id_index_home"], axis=1, inplace=True)
        df_aux.columns = [
            "home_" + col if ((col in df_teams.columns) and (col != "id")) else col
            for col in df_aux.columns
        ]

        df_aux = df_aux.merge(
            df_teams,
            left_on="team_away",
            right_on="team_id",
            how="left",
            suffixes=(None, "_index_away"),
        )
        df_aux.drop(columns=["team_id", "id_index_away"], axis=1, inplace=True)
        df_aux.columns = [
            "away_" + col if ((col in df_teams.columns) and (col != "id")) else col
            for col in df_aux.columns
        ]

        dfs.append(df_aux)

df = pd.concat(dfs)

In [36]:
df = df[
    df["home_name"].notnull()
    & df["away_name"].notnull()
    & df["home_history"].notnull()
    & df["away_history"].notnull()
]

In [37]:
df["match_date"] = pd.to_datetime(df["match_date"])

### Feature engineering


Features no necesarias para el modelo: id, fixture, result_predict, result_real, query_date_, name_, 

OHE: team_home?, team_away?, result_real

Nada: total_played_, wins_home_, wins_away_away_, draws_home_, draws_away_, loses_home_, goals_for_home_, goals_for_away_, goals_against_home_, goals_against_away_

Otros: history_away_

Siguientes fases: fixture, match_date (día de la semana, mes, etc)

!!! team_id_away

In [38]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import pickle

In [39]:
# Drop unnecesary features for model
df.drop(
    columns=[
        "id",
        "fixture",
        "result_predict",
        "home_query_date",
        "away_query_date",
        "home_name",
        "away_name",
        "match_date",
        "away_season",
        "season_index_home"
    ],
    axis=1,
    inplace=True,
)

In [40]:
df.rename({"home_season":"season"}, axis=1, inplace=True)

In [41]:
df.head()

Unnamed: 0,team_home,team_away,result_real,season,home_history,home_total_played,home_wins_home,home_wins_away,home_draws_home,home_draws_away,...,away_wins_home,away_wins_away,away_draws_home,away_draws_away,away_loses_home,away_loses_away,away_goals_for_home,away_goals_for_away,away_goals_against_home,away_goals_against_away
0,531,548,home_win,2022,DWWLWWWDLDLWLWDDLLLWWLLDLWDWWWLDLLWLLD,38.0,8.0,6.0,3.0,6.0,...,11.0,10.0,5.0,3.0,3.0,6.0,26.0,25.0,16.0,19.0
1,543,540,home_win,2022,WWWLWWLDWDLWDLDWLWLLWWWDDWLLWLDLWWDLWD,38.0,9.0,8.0,5.0,4.0,...,4.0,4.0,7.0,6.0,8.0,9.0,27.0,25.0,35.0,34.0
2,538,798,away_win,2022,DLWWLLWLLLDLLDDWDLWWLDWDWWDDLLWLLLLDLW,38.0,7.0,4.0,6.0,4.0,...,10.0,4.0,5.0,3.0,4.0,12.0,22.0,15.0,12.0,31.0
3,724,541,away_win,2022,LLLLLWDDDDLWDLDWDLWLWLWDDDLWLDLWLLWLWD,38.0,7.0,3.0,6.0,6.0,...,13.0,11.0,5.0,1.0,1.0,7.0,44.0,31.0,16.0,20.0
4,533,720,away_win,2022,WWDWLDDLWLWLLWWWDWLLLLWWDWWWLLWWDWWWLD,38.0,12.0,7.0,3.0,4.0,...,8.0,3.0,4.0,3.0,7.0,13.0,21.0,12.0,25.0,38.0


In [42]:
df["season"].value_counts()

season
2022    380
2023    306
Name: count, dtype: int64

In [43]:
# One hot encoding
# ohe_cols = ["team_home", "team_away", "result_real"]
ohe_cols = ["team_home", "team_away", "season"]
ohe_encoder = OneHotEncoder(sparse_output=False).set_output(transform="pandas")

In [44]:
ohe_encoder = ohe_encoder.fit(df[ohe_cols])

In [101]:
with open("ohe_encoder.sav", "wb") as f:
    pickle.dump(ohe_encoder, f)

In [None]:
mlflow.sklearn.log_model(
    sk_model=ohe_encoder,
    artifact_path="ohe_encoder",
    registered_model_name="ohe_encoder",
)

https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=59a5e761a21347909e93ff6743db8dd9&run_id=59a5e761a21347909e93ff6743db8dd9 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/59a5e761a21347909e93ff6743db8dd9/artifacts/ohe_encoder/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/59a5e761a21347909e93ff6743db8dd9/artifacts/ohe_encoder/MLmodel HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/59a5e761a21347909e93ff6743db8dd9/artifacts/ohe_encoder/model.pkl HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-o

In [64]:
ohe_info = mlflow_client.get_latest_versions("ohe_encoder")[0]

  ohe_info = mlflow_client.get_latest_versions("ohe_encoder")[0]
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/registered-models/get-latest-versions HTTP/11" 200 None


In [65]:
mlflow_client.set_model_version_tag(
    name="ohe_encoder",
    version=ohe_info.version,
    key="date_version",
    value=date_version
)

https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/set-tag HTTP/11" 200 None


In [102]:
ohe_encoded = ohe_encoder.transform(df[ohe_cols])

In [103]:
df = pd.concat([df, ohe_encoded], axis=1).drop(columns=ohe_cols)

In [104]:
df["home_history"] = df["home_history"].apply(lambda x: list(x)[:6])
df["away_history"] = df["away_history"].apply(lambda x: list(x)[:6])

In [105]:
df[
    [
        "home_last_1",
        "home_last_2",
        "home_last_3",
        "home_last_4",
        "home_last_5",
        "home_last_6",
    ]
] = df["home_history"].apply(pd.Series)
df[
    [
        "away_last_1",
        "away_last_2",
        "away_last_3",
        "away_last_4",
        "away_last_5",
        "away_last_6",
    ]
] = df["away_history"].apply(pd.Series)

In [106]:
df.drop(["home_history", "away_history"], axis=1, inplace=True)

In [107]:
label_cols = [
    "home_last_1",
    "home_last_2",
    "home_last_3",
    "home_last_4",
    "home_last_5",
    "home_last_6",
    "away_last_1",
    "away_last_2",
    "away_last_3",
    "away_last_4",
    "away_last_5",
    "away_last_6",
]

In [108]:
df.replace({"L": 0, "D": 1, "W": 2}, inplace=True)

  df.replace({"L": 0, "D": 1, "W": 2}, inplace=True)


In [109]:
from sklearn.preprocessing import LabelEncoder

# TODO: label encoder
le = LabelEncoder()
le.fit(df["result_real"])

In [110]:
df["result_real"] = le.transform(df["result_real"])

In [111]:
# History feature
cols_home_last = [
    "home_last_1",
    "home_last_2",
    "home_last_3",
    "home_last_4",
    "home_last_5",
    "home_last_6",
]
cols_away_last = [
    "away_last_1",
    "away_last_2",
    "away_last_3",
    "away_last_4",
    "away_last_5",
    "away_last_6",
]

df["home_last_avg"] = df[cols_home_last].mean(axis=1, skipna=True)
df["away_last_avg"] = df[cols_away_last].mean(axis=1, skipna=True)

df.drop(columns=cols_home_last, axis=1, inplace=True)
df.drop(columns=cols_away_last, axis=1, inplace=True)

In [112]:
history_feature = "Avg last 6 matches"

In [113]:
df.head()

Unnamed: 0,result_real,home_total_played,home_wins_home,home_wins_away,home_draws_home,home_draws_away,home_loses_home,home_loses_away,home_goals_for_home,home_goals_for_away,...,team_away_723,team_away_724,team_away_727,team_away_728,team_away_797,team_away_798,season_2022,season_2023,home_last_avg,away_last_avg
0,2,38.0,8.0,6.0,3.0,6.0,8.0,7.0,22.0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.5,1.166667
1,2,38.0,9.0,8.0,5.0,4.0,5.0,7.0,26.0,20.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.666667,0.5
2,0,38.0,7.0,4.0,6.0,4.0,6.0,11.0,26.0,17.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.833333,1.0
3,0,38.0,7.0,3.0,6.0,6.0,6.0,10.0,19.0,11.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.333333,2.0
4,0,38.0,12.0,7.0,3.0,4.0,4.0,8.0,36.0,23.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.333333,0.5


### Train and predict

In [239]:
from sklearn import metrics

# from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import (
    train_test_split,
    cross_val_predict,
    cross_val_score,
    learning_curve,
)

from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    ExtraTreesClassifier,
    GradientBoostingClassifier,
)
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
#import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV


from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from mlflow.models import infer_signature

import secrets

In [None]:
# Split by target variable
target_variable = "result_real"

X, y = df.loc[:, df.columns != target_variable], df[target_variable]

In [241]:
# Scale
scaled = False
if scaled == True:
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [242]:
num_samples = len(X)

In [243]:
classifiers = {
    "logistic_regression": LogisticRegression(
        max_iter=10000,
    ),
    "knn_1": KNeighborsClassifier(1),

    "knn_3": KNeighborsClassifier(3),

    "knn_5": KNeighborsClassifier(5),

    "knn_15": KNeighborsClassifier(15),

    "knn_25": KNeighborsClassifier(25),

    "knn_50": KNeighborsClassifier(50),
    "knn_50": KNeighborsClassifier(100),

    "decision_tree": DecisionTreeClassifier(),

    "rfc_10": RandomForestClassifier(n_estimators=10),

    "rfc_100": RandomForestClassifier(n_estimators=100),

    "rfc_1000": RandomForestClassifier(n_estimators=1000),

    "gbr": GradientBoostingClassifier(),

    # (xgb.XGBClassifier(), "xgb"),

    # TODO: EXTREME GRADIENT BOOSTING

}

In [244]:
import matplotlib.pyplot as plt


def evaluate_classifier(classifier, X, y):
    start = time.time()
    y_pred = cross_val_predict(classifier, X, y, cv=10)
    y_pred_proba = cross_val_predict(classifier, X, y, cv=10, method="predict_proba")
    acc = metrics.accuracy_score(y, y_pred)
    precision = metrics.precision_score(y, y_pred, average="macro")
    recall = metrics.recall_score(y, y_pred, average="macro")
    f1 = metrics.f1_score(y, y_pred, average="macro")
    # roc = metrics.roc_auc_score(y, y_pred_proba[:,1], average="macro", multi_class="ovo")
    cm = metrics.confusion_matrix(y, y_pred)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2024)
    trained_model = classifier.fit(X_train, y_train)
    
    end = time.time()

    model_metrics = {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "confusion_matrix": cm,
        "elapsed_time": end - start,
    }

    return trained_model, model_metrics

In [308]:
def track_run(classifier, classifier_name, mlflow_client, log_model=True,  **kwargs):
    model_metrics = kwargs.get("model_metrics")
    model_metrics.pop("confusion_matrix")
    params = kwargs.get("params")
    tags = kwargs.get("tags")
    model_data = kwargs.get("model_data")
    figures = kwargs.get("figures")

    # Start an MLflow run
    with mlflow.start_run():
        mlflow.set_tag("mlflow.runName", f"{classifier_name}_{secrets.token_hex(16)}")

        # Log metrics
        if model_metrics:
            mlflow.log_metrics(model_metrics)

        # Log params
        if params:
            mlflow.log_params(params)
        # size, scale, history

        if figures:
            for name, fig in figures.items():
                mlflow.log_figure(fig, "confusion_matrix.png")

        # Infer the model signature
        # signature = infer_signature(X, classifier.predict(X))

        # Log the model
        if log_model:
            model_info = mlflow.sklearn.log_model(
                sk_model=classifier,
                artifact_path=classifier_name,
                # signature=signature,
                input_example=X,
                registered_model_name=classifier_name,
            )
            model_info = mlflow_client.get_latest_versions(classifier_name)[0]
            for key, value in tags.items():
                mlflow_client.set_model_version_tag(
                    name=classifier_name,
                    version=model_info.version,
                    key=key,
                    value=value
                )

        return mlflow.active_run().info.run_id

In [None]:

runs = pd.DataFrame(columns=["run_id", "accuracy", "precision", "recall", "f1", "confusion_matrix", "elapsed_time"])

In [None]:
for name, classifier in classifiers.items():
    print(f"Training {name}")
    trained_model, model_metrics = evaluate_classifier(classifier, X, y)

    # cm = metrics.ConfusionMatrixDisplay(
    #     confusion_matrix=model_metrics["confusion_matrix"]
    # ).plot()
    run_id = track_run(
        trained_model,
        name,
        log_model=True,
        model_metrics=model_metrics,
        mlflow_client=mlflow_client,
        params={
            "dataset_size": num_samples,
            "scaled": scaled,
            "history_feature": history_feature,
        },
        tags = {
            "date_version": datetime.datetime.now().strftime("%Y-%m-%d")
        }
        # figures={"confusion_matrix": cm.figure_},
    )
    row_dict = model_metrics.copy()
    row_dict["run_id"] = run_id
    runs.loc[name] = row_dict
    

Training logistic_regression


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=72df9d81703646058461d53081662405&run_id=72df9d81703646058461d53081662405 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/72df9d81703646058461d53081662405/artifacts/logistic_regression/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6

Training knn_1


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=47a59f95dbcc46ffb3f5d20123d2ed90&run_id=47a59f95dbcc46ffb3f5d20123d2ed90 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/47a59f95dbcc46ffb3f5d20123d2ed90/artifacts/knn_1/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/47a59f95d

Training knn_3


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=54f36ebb86a649629de1a8e859e50f6d&run_id=54f36ebb86a649629de1a8e859e50f6d HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/54f36ebb86a649629de1a8e859e50f6d/artifacts/knn_3/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/54f36ebb8

Training knn_5


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=a9b674fbd5494733b5f5cd8a8d5ce2c5&run_id=a9b674fbd5494733b5f5cd8a8d5ce2c5 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/a9b674fbd5494733b5f5cd8a8d5ce2c5/artifacts/knn_5/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/a9b674fbd

Training knn_15


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=6faeb32195034e3695802cb254537352&run_id=6faeb32195034e3695802cb254537352 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/6faeb32195034e3695802cb254537352/artifacts/knn_15/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/6faeb321

Training knn_25


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=5050f778b80b4ac6ada464355ba4b031&run_id=5050f778b80b4ac6ada464355ba4b031 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/5050f778b80b4ac6ada464355ba4b031/artifacts/knn_25/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/5050f778

Training knn_50


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=87fda7ee184f4cd492b32a5c187dc877&run_id=87fda7ee184f4cd492b32a5c187dc877 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/87fda7ee184f4cd492b32a5c187dc877/artifacts/knn_50/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/87fda7ee

Training decision_tree


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=71760d2f368441bb927e2c13e787bb99&run_id=71760d2f368441bb927e2c13e787bb99 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/71760d2f368441bb927e2c13e787bb99/artifacts/decision_tree/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/7

Training rfc_10


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=9b3e3cd788664f06955681455f203c5c&run_id=9b3e3cd788664f06955681455f203c5c HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/9b3e3cd788664f06955681455f203c5c/artifacts/rfc_10/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/9b3e3cd7

Training rfc_100


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=36fe7036cfc74793b043a0073646615b&run_id=36fe7036cfc74793b043a0073646615b HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/36fe7036cfc74793b043a0073646615b/artifacts/rfc_100/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/36fe703

Training rfc_1000


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=30998ffe63e14db9a42e69b6d6095381&run_id=30998ffe63e14db9a42e69b6d6095381 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/30998ffe63e14db9a42e69b6d6095381/artifacts/rfc_1000/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/30998f

Training gbr


https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/set-tag HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/log-batch HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=851c479a82a74237bb34e9215232de3d&run_id=851c479a82a74237bb34e9215232de3d HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/851c479a82a74237bb34e9215232de3d/artifacts/gbr/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/851c479a82a

In [311]:
runs

Unnamed: 0,run_id,accuracy,precision,recall,f1,confusion_matrix,elapsed_time
logistic_regression,72df9d81703646058461d53081662405,0.507289,0.440315,0.44763,0.438651,,4.315142
knn_1,47a59f95dbcc46ffb3f5d20123d2ed90,0.44898,0.406475,0.406684,0.406366,,0.244204
knn_3,54f36ebb86a649629de1a8e859e50f6d,0.450437,0.377473,0.40361,0.384709,,0.152957
knn_5,a9b674fbd5494733b5f5cd8a8d5ce2c5,0.469388,0.421772,0.43047,0.423358,,0.153561
knn_15,6faeb32195034e3695802cb254537352,0.524781,0.451065,0.465454,0.446386,,0.16726
knn_25,5050f778b80b4ac6ada464355ba4b031,0.556851,0.482427,0.491513,0.471496,,0.30039
knn_50,87fda7ee184f4cd492b32a5c187dc877,0.556851,0.452035,0.470179,0.430264,,0.187964
decision_tree,71760d2f368441bb927e2c13e787bb99,0.413994,0.3896,0.390486,0.389926,,0.213328
rfc_10,9b3e3cd788664f06955681455f203c5c,0.469388,0.416445,0.426578,0.418934,,0.2511
rfc_100,36fe7036cfc74793b043a0073646615b,0.497085,0.427177,0.437854,0.426289,,3.9582


In [312]:
best_model_name = runs["f1"].idxmax()

In [313]:
best_model_run_id = runs.loc[runs["f1"].idxmax(), "run_id"]

In [None]:
best_model = mlflow_client.search_model_versions(f"run_id='{best_model_run_id}'")[0]

In [13]:
best_model

<ModelVersion: aliases=[], creation_timestamp=1731258626219, current_stage='None', description='', last_updated_timestamp=1731258626219, name='knn_25', run_id='5050f778b80b4ac6ada464355ba4b031', run_link='', source='mlflow-artifacts:/6d5735cbe89241f69cbbd4214ee6925e/5050f778b80b4ac6ada464355ba4b031/artifacts/knn_25', status='READY', status_message='', tags={'date_version': '2024-11-10'}, user_id='', version='4'>

In [14]:
mlflow_client.set_model_version_tag(
    name=best_model.name,
    version=best_model.version,
    key="weekly_best",
    value=True
)

In [15]:
mlflow_client.copy_model_version(
    src_model_uri=f"models:/{best_model.name}/{best_model.version}",
    dst_name="oracle-model-production",
)

Successfully registered model 'oracle-model-production'.
Copied version '4' of model 'knn_25' to version '1' of model 'oracle-model-production'.


<ModelVersion: aliases=[], creation_timestamp=1731262502288, current_stage='None', description='', last_updated_timestamp=1731262502288, name='oracle-model-production', run_id='5050f778b80b4ac6ada464355ba4b031', run_link='', source='models:/knn_25/4', status='READY', status_message='', tags={'date_version': '2024-11-10', 'weekly_best': 'True'}, user_id='', version='1'>

In [72]:
mlflow_client.copy_model_version(
    src_model_uri=f"models:/ohe_encoder/{ohe_info.version}",
    dst_name="oracle-ohe-production",
)

https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/get?name=ohe_encoder&version=1 HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/registered-models/create HTTP/11" 200 None
Successfully registered model 'oracle-ohe-production'.
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/create HTTP/11" 200 None
Copied version '1' of model 'ohe_encoder' to version '1' of model 'oracle-ohe-production'.


<ModelVersion: aliases=[], creation_timestamp=1731264802140, current_stage='None', description='', last_updated_timestamp=1731264802140, name='oracle-ohe-production', run_id='59a5e761a21347909e93ff6743db8dd9', run_link='', source='models:/ohe_encoder/1', status='READY', status_message='', tags={'date_version': '2024-11-10'}, user_id='', version='1'>

In [70]:
ohe_info.name

'49093_ohe_encoder'

### Get last PRO model

In [18]:
import mlflow.sklearn
import dagshub

In [None]:
dagshub.auth.add_app_token(token: str, host: str | None = None, **kwargs)

In [19]:
mlflow.set_tracking_uri("https://dagshub.com/josmunpen/laliga-oracle-dags.mlflow")
dagshub.init(repo_owner="josmunpen", repo_name="laliga-oracle-dags", mlflow=True)

HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"
HTTP Request: GET https://dagshub.com/api/v1/repos/josmunpen/laliga-oracle-dags "HTTP/1.1 200 OK"
HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


Initialized MLflow to track repo "josmunpen/laliga-oracle-dags"


Repository josmunpen/laliga-oracle-dags initialized!


In [8]:
back_model = mlflow.sklearn.load_model("models:/oracle-model-production/latest")
# Alternative: latest_mv = mlflow_client.get_latest_versions("oracle-model-production", stages=None)[0] 

  latest = client.get_latest_versions(name, None if stage is None else [stage])


In [2]:
import mlflow.sklearn

In [6]:
# ohe_model = mlflow.sklearn.load_model("models:/ohe_encoder/latest")
ohe_model = mlflow.sklearn.load_model("models:/oracle-ohe-production/latest")
# Alternative: ohe_model = mlflow_client.get_latest_versions("ohe_encoder")[0]

In [9]:
ohe_model.categories_

[array([529, 530, 531, 532, 533, 534, 536, 538, 540, 541, 542, 543, 546,
        547, 548, 715, 720, 723, 724, 727, 728, 797, 798]),
 array([529, 530, 531, 532, 533, 534, 536, 538, 540, 541, 542, 543, 546,
        547, 548, 715, 720, 723, 724, 727, 728, 797, 798]),
 array([2022, 2023])]

In [17]:
import pandas as pd

df = pd.read_csv("before_ohe.csv")

In [21]:
ohe_model.transform(df[["team_home", "team_away", "season"]])

ValueError: Found unknown categories [np.int64(2024)] in column 2 during transform

In [10]:
ohe_model_old = mlflow.sklearn.load_model("models:/ohe_encoder/latest")

  latest = client.get_latest_versions(name, None if stage is None else [stage])


In [11]:
ohe_model_old.categories_

[array([529, 530, 531, 532, 533, 534, 536, 538, 540, 541, 542, 543, 546,
        547, 548, 715, 720, 723, 724, 727, 728, 797, 798]),
 array([529, 530, 531, 532, 533, 534, 536, 538, 540, 541, 542, 543, 546,
        547, 548, 715, 720, 723, 724, 727, 728, 797, 798]),
 array([2022, 2023])]

In [11]:
print(datetime.datetime.now())

2024-11-23 00:15:38.127352


### Get best model

In [None]:
def get_last_model(mlflow_client):
    for mv in mlflow_client.search_model_versions("tag.weekly_best='True'"):
        if mv.last_updated_timestamp > last_ts:
            last_model = mv
    
    print(f"Last model found: {mv.name} (date version {mv.tags.get('date_version')}, version {mv.version})")

    return last_model

In [None]:
from pprint import pprint

for rm in mlflow_client.search_registered_models():
    pprint(dict(rm), indent=4)

Incremented Retry for (url='/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/registered-models/search?max_results=100'): Retry(total=4, connect=5, read=4, redirect=5, status=5)
Retrying (Retry(total=4, connect=5, read=4, redirect=5, status=5)) after connection broken by 'RemoteDisconnected('Remote end closed connection without response')': /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/registered-models/search?max_results=100
Starting new HTTPS connection (9): dagshub.com:443
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/registered-models/search?max_results=100 HTTP/11" 200 None


{   'aliases': {},
    'creation_timestamp': 1731239168501,
    'description': '',
    'last_updated_timestamp': 1731239170503,
    'latest_versions': [   <ModelVersion: aliases=[], creation_timestamp=1731239170503, current_stage='None', description='', last_updated_timestamp=1731239170503, name='decision_tree', run_id='d7044785730e46399232a39f6d417ce7', run_link='', source='mlflow-artifacts:/70aadf01ff15477898d0ebd15cd32289/d7044785730e46399232a39f6d417ce7/artifacts/decision_tree', status='READY', status_message='', tags={'date_version': '2024-11-10'}, user_id='', version='1'>],
    'name': 'decision_tree',
    'tags': {}}
{   'aliases': {},
    'creation_timestamp': 1722788340294,
    'description': '',
    'last_updated_timestamp': 1722893390838,
    'latest_versions': [   <ModelVersion: aliases=[], creation_timestamp=1722893390838, current_stage='None', description='', last_updated_timestamp=1722893390838, name='Decision tree', run_id='889d32a0ed984ccca90d22b79f32bcc0', run_link=''

In [None]:
# def get_last_model(mlflow_client):
#     last_model = None
#     last_ts = 0
#     for mv in mlflow_client.search_model_versions("tag.weekly_best='True'"):
#         if mv.last_updated_timestamp > last_ts:
#             last_model = mv
    
#     print(f"Last model found: {mv.name} (date version {mv.tags.get('date_version')}, version {mv.version})")

#     return last_model

In [276]:
best_last_model = get_last_model(mlflow_client=mlflow_client)

Resetting dropped connection: dagshub.com
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/search?filter=tag.weekly_best%3D%27True%27&max_results=10000 HTTP/11" 200 None


Last model found: knn_25 (date version 2024-11-10, version 3)


In [277]:
best_last_model.source

'mlflow-artifacts:/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model'

In [278]:
best_last_model.current_stage

'None'

In [285]:
best_last_model.run_id

'aa97c6011e7e4f9ca86c99b0848ef27f'

In [318]:
best_last_model.version

'3'

In [295]:
model_uri = f"runs:/{best_last_model.run_id}/model"
model = mlflow.sklearn.load_model(model_uri=model_uri)

https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=aa97c6011e7e4f9ca86c99b0848ef27f&run_id=aa97c6011e7e4f9ca86c99b0848ef27f HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=6d5735cbe89241f69cbbd4214ee6925e%2Faa97c6011e7e4f9ca86c99b0848ef27f%2Fartifacts%2Fmodel HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/model HTTP/11" 500 None
Incremented Retry for (url='/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/model'): Retry(total=4, connect=5, read=5, redirect=5, status=4)
Retry: /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/m

MlflowException: The following failures occurred while downloading one or more artifacts from https://dagshub.com/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts:
##### File model #####
API request to https://dagshub.com/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/model failed with exception HTTPSConnectionPool(host='dagshub.com', port=443): Max retries exceeded with url: /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/model (Caused by ResponseError('too many 500 error responses'))

In [296]:
f"models:/{best_last_model.name}/{best_last_model.version}"

'models:/knn_25/3'

In [300]:
test = mlflow.sklearn.load_model("models:/knn_25/1")

https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/get-download-uri?name=knn_25&version=3 HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=6d5735cbe89241f69cbbd4214ee6925e%2Faa97c6011e7e4f9ca86c99b0848ef27f%2Fartifacts%2Fsklearn-model HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model/ HTTP/11" 500 None
Incremented Retry for (url='/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model/'): Retry(total=4, connect=5, read=5, redirect=5, status=4)
Retry: /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-mode

MlflowException: The following failures occurred while downloading one or more artifacts from https://dagshub.com/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model:
##### File  #####
API request to https://dagshub.com/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model/ failed with exception HTTPSConnectionPool(host='dagshub.com', port=443): Max retries exceeded with url: /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model/ (Caused by ResponseError('too many 500 error responses'))

In [299]:
f"models:/{best_last_model.name}/{best_last_model.version}"

'models:/knn_25/3'

In [317]:
best_last_model.version

'3'

In [297]:
test = mlflow.sklearn.load_model(f"models:/{best_last_model.name}/{best_last_model.version}")

https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/get-download-uri?name=knn_25&version=3 HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=6d5735cbe89241f69cbbd4214ee6925e%2Faa97c6011e7e4f9ca86c99b0848ef27f%2Fartifacts%2Fsklearn-model HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model/ HTTP/11" 500 None
Incremented Retry for (url='/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model/'): Retry(total=4, connect=5, read=5, redirect=5, status=4)
Retry: /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-mode

MlflowException: The following failures occurred while downloading one or more artifacts from https://dagshub.com/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model:
##### File  #####
API request to https://dagshub.com/josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model/ failed with exception HTTPSConnectionPool(host='dagshub.com', port=443): Max retries exceeded with url: /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/6d5735cbe89241f69cbbd4214ee6925e/aa97c6011e7e4f9ca86c99b0848ef27f/artifacts/sklearn-model/ (Caused by ResponseError('too many 500 error responses'))

### Train again and load best model to MLFlow



In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2024)
# trained_model = model.fit(X_train, y_train)

In [None]:
# y_pred = trained_model.predict(X_test)

In [None]:
# trained_model.score(X_test, y_test)

0.5232558139534884

In [None]:
# # Log the model
# model_info = mlflow.sklearn.log_model(
#     sk_model=trained_model,
#     artifact_path=best_model_name + "_artifact",
#     # signature=signature,
#     input_example=X,
#     registered_model_name=best_model_name,
#     metadata={
#         "date_version": datetime.datetime.now().strftime("%Y-%m-%d")
#     }
# )

https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=15dd2540cfea4a2cbc515260d34e1250&run_id=15dd2540cfea4a2cbc515260d34e1250 HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/a2875dc10ef146ba857f1c12be552dda/15dd2540cfea4a2cbc515260d34e1250/artifacts/knn_25_artifact/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/a2875dc10ef146ba857f1c12be552dda/15dd2540cfea4a2cbc515260d34e1250/artifacts/knn_25_artifact/input_example.json HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/a2875dc10ef146ba857f1c12be552dda/15dd2540cfea4a2cbc515260d34e1250/artifacts/knn_25_artifact/MLmodel HTTP/11" 200 None
https://dagshub.com:443 "PU

In [164]:
model_info = mlflow_client.get_latest_versions(best_model_name)[0]
mlflow_client.set_model_version_tag(
    name=best_model_name,
    version=model_info.version,
    key="best_weekly",
    value=True
)

  model_info = mlflow_client.get_latest_versions(best_model_name)[0]
Resetting dropped connection: dagshub.com
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/registered-models/get-latest-versions HTTP/11" 200 None
https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/set-tag HTTP/11" 200 None


In [138]:
test = mlflow.sklearn.load_model("models:/knn_25/1")

https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/get-download-uri?name=knn_25&version=1 HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=a2875dc10ef146ba857f1c12be552dda%2F15dd2540cfea4a2cbc515260d34e1250%2Fartifacts%2Fknn_25_artifact HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=a2875dc10ef146ba857f1c12be552dda%2F15dd2540cfea4a2cbc515260d34e1250%2Fartifacts%2Fknn_25_artifact HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/a2875dc10ef146ba857f1c12be552dda/15dd2540cfea4a2cbc515260d34e1250/artifacts/knn_25_artifact/serving_input_example.json HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/a2875dc10ef146ba857f1c12be552dda/15dd2540cfea4a2cbc515260d34e125

In [140]:
test.predict_proba(X_test)

array([[0.16, 0.24, 0.6 ],
       [0.08, 0.24, 0.68],
       [0.24, 0.28, 0.48],
       [0.12, 0.2 , 0.68],
       [0.4 , 0.36, 0.24],
       [0.04, 0.16, 0.8 ],
       [0.16, 0.36, 0.48],
       [0.64, 0.28, 0.08],
       [0.  , 0.08, 0.92],
       [0.28, 0.4 , 0.32],
       [0.4 , 0.2 , 0.4 ],
       [0.08, 0.16, 0.76],
       [0.24, 0.32, 0.44],
       [0.28, 0.16, 0.56],
       [0.04, 0.16, 0.8 ],
       [0.32, 0.32, 0.36],
       [0.16, 0.24, 0.6 ],
       [0.44, 0.16, 0.4 ],
       [0.28, 0.24, 0.48],
       [0.36, 0.28, 0.36],
       [0.36, 0.16, 0.48],
       [0.52, 0.2 , 0.28],
       [0.08, 0.12, 0.8 ],
       [0.28, 0.36, 0.36],
       [0.08, 0.08, 0.84],
       [0.64, 0.2 , 0.16],
       [0.16, 0.36, 0.48],
       [0.24, 0.28, 0.48],
       [0.2 , 0.4 , 0.4 ],
       [0.48, 0.24, 0.28],
       [0.28, 0.44, 0.28],
       [0.24, 0.24, 0.52],
       [0.04, 0.08, 0.88],
       [0.4 , 0.36, 0.24],
       [0.12, 0.12, 0.76],
       [0.24, 0.16, 0.6 ],
       [0.2 , 0.2 , 0.6 ],
 

_______

### Inference

In [117]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2024)

In [53]:
model = LogisticRegression(max_iter=10000, random_state=2024)

In [54]:
trained_model = model.fit(X_train, y_train)

In [55]:
classifier_name = "log_reg_def"


# Log the model
model_info = mlflow.sklearn.log_model(
    sk_model=trained_model,
    artifact_path=classifier_name + "_artifact",
    # signature=signature,
    input_example=X,
    registered_model_name=classifier_name,
)

https://dagshub.com:443 "POST /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/create HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/runs/get?run_uuid=1f35822d627547c0abb1225f9b7426da&run_id=1f35822d627547c0abb1225f9b7426da HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/620ba81b100844bd829f3fedcdac3aab/1f35822d627547c0abb1225f9b7426da/artifacts/log_reg_def_artifact/conda.yaml HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/620ba81b100844bd829f3fedcdac3aab/1f35822d627547c0abb1225f9b7426da/artifacts/log_reg_def_artifact/input_example.json HTTP/11" 200 None
https://dagshub.com:443 "PUT /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/620ba81b100844bd829f3fedcdac3aab/1f35822d627547c0abb1225f9b7426da/artifacts/log_reg_def_artifact/MLmodel HTTP/11" 200 None
https://dags

In [83]:
y_pred = trained_model.predict(X_test)

In [84]:
trained_model.score(X_test, y_test)

0.5333333333333333

In [86]:
import pickle

filename = "log_reg_v1.sav"

In [87]:
pickle.dump(trained_model, open(filename, "wb"))

In [10]:
import pickle

# some time later...

# load the model from disk
loaded_model = pickle.load(open(f"./backend/models/{filename}", "rb"))
# result = loaded_model.score(X_test, Y_test)
# print(result)

In [11]:
loaded_model.classes_

array([0, 1, 2])

In [60]:
X_test["team_home_531"].iloc[0]

0.0

In [56]:
# Load MLflow model
import mlflow.sklearn

In [57]:
test = mlflow.sklearn.load_model("models:/log_reg_def/1")

https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow/model-versions/get-download-uri?name=log_reg_def&version=1 HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=620ba81b100844bd829f3fedcdac3aab%2F1f35822d627547c0abb1225f9b7426da%2Fartifacts%2Flog_reg_def_artifact HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=620ba81b100844bd829f3fedcdac3aab%2F1f35822d627547c0abb1225f9b7426da%2Fartifacts%2Flog_reg_def_artifact HTTP/11" 200 None
Starting new HTTPS connection (2): dagshub.com:443
Starting new HTTPS connection (3): dagshub.com:443
Starting new HTTPS connection (4): dagshub.com:443
Starting new HTTPS connection (5): dagshub.com:443
Starting new HTTPS connection (6): dagshub.com:443
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/620ba81b100844bd829f3fedcdac3aab

In [70]:
sk_model = mlflow.sklearn.load_model(
    "mlflow-artifacts:/620ba81b100844bd829f3fedcdac3aab/a2f711cd2e5a468f83945402e745506b/artifacts/Logistic Regression artifact"
)

https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=620ba81b100844bd829f3fedcdac3aab%2Fa2f711cd2e5a468f83945402e745506b%2Fartifacts%2FLogistic+Regression+artifact HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts?path=620ba81b100844bd829f3fedcdac3aab%2Fa2f711cd2e5a468f83945402e745506b%2Fartifacts%2FLogistic+Regression+artifact HTTP/11" 200 None
Resetting dropped connection: dagshub.com
Resetting dropped connection: dagshub.com
Resetting dropped connection: dagshub.com
Resetting dropped connection: dagshub.com
Resetting dropped connection: dagshub.com
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/mlflow-artifacts/artifacts/620ba81b100844bd829f3fedcdac3aab/a2f711cd2e5a468f83945402e745506b/artifacts/Logistic%20Regression%20artifact/requirements.txt HTTP/11" 200 None
https://dagshub.com:443 "GET /josmunpen/laliga-oracle-dags.mlflow/api/2.0/m

In [71]:
sk_model

In [74]:
predictions = sk_model.predict(X_test.iloc[0].values.reshape(1, -1))



In [76]:
predictions = sk_model.predict_proba(X_test.iloc[0].values.reshape(1, -1))



In [77]:
predictions

array([[0.04450971, 0.73681219, 0.2186781 ]])

_______________

### WIP

In [101]:
from backend.utils.utils import get_team_data, get_match_data

In [123]:
import pickle

In [124]:
team_home = 531
team_away = 532

In [128]:
df_match = get_match_data(team_home, team_away)

In [164]:
df_match = pd.concat([df_match, ohe_encoded], axis=1).drop(columns=ohe_cols)

In [78]:
from sklearn.pipeline import make_pipeline

In [116]:
pipe = make_pipeline(OneHotEncoder())