# Import Packages

In [None]:
pip install mlflow xgboost dagshub

In [2]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
import mlflow
import mlflow.xgboost

# Load Data

In [3]:
df = pd.read_csv("Modified_Mock_Sales_Data__Fashion_.csv", parse_dates=["date"])

In [4]:
df

Unnamed: 0,date,demand,sales,price,category,store,region,season,stock_level,promotions,weather
0,2024-04-05,51,1,47.44,Clothing,Store C,North,Autumn,40,No,Sunny
1,2024-05-01,873,849,20.45,Footwear,Store E,West,Autumn,139,No,Sunny
2,2024-11-01,152,69,47.75,Accessories,Store C,East,Autumn,183,Yes,Snowy
3,2024-02-09,998,945,43.57,Footwear,Store A,South,Summer,330,Yes,Rainy
4,2023-09-05,584,488,9.67,Footwear,Store C,West,Autumn,332,No,Rainy
...,...,...,...,...,...,...,...,...,...,...,...
495,2023-09-11,974,879,44.72,Accessories,Store C,East,Spring,143,No,Snowy
496,2023-09-27,173,86,38.37,Accessories,Store B,West,Winter,390,No,Sunny
497,2024-10-22,961,876,45.76,Footwear,Store D,South,Summer,260,Yes,Rainy
498,2023-09-26,782,697,23.90,Jewelry,Store C,East,Winter,376,No,Cloudy


# Data Preprocessing

In [5]:
label_encoders = {}
categorical_cols = ["category", "store", "region", "season", "promotions", "weather"]
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

X = df.drop(columns=["date", "demand"]) 
y = df["demand"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Setting MLflow

In [None]:
# ถ้าจะ share เว็บ MLFlow UI
# import dagshub
# dagshub.init(repo_owner='your_git_name', repo_name='your_repo_name', mlflow=True)

In [None]:
mlflow.set_experiment("Demand_Forecasting_1")
# mlflow.set_tracking_uri("https://dagshub.com/your_git_name/your_repo_name.mlflow") ถ้าจะ share เว็บ MLFlow UI

In [None]:
params = {
    "colsample_bytree": 0.7621345817034147,
    "learning_rate": 0.1456729184558633,
    "max_depth": 7,
    "min_child_weight": 4,
    "n_estimators": 187,
    "subsample": 0.8215682095844519, 
    "random_state": 42 
}

In [None]:
with mlflow.start_run(run_name="xgboost_forecast_1") as run:
    model = xgb.XGBRegressor(**params)

    mlflow.xgboost.autolog()

    model.fit(X_train, y_train)

    # Log เฉพาะ parameter ตามที่เลือก
    mlflow.log_params(params)
    
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    rmse = root_mean_squared_error(y_test, y_pred)
    
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("rmse", rmse)
    
    # บันทึก artifacts
    mlflow.log_artifact("Modified_Mock_Sales_Data__Fashion_.csv")
    mlflow.log_artifact("XGBoost_Forecast.ipynb")

    mlflow.log_param("num_samples", len(df))
    mlflow.log_param("num_features", df.shape[1])

    # Model Registry
    # สร้าง URI ของโมเดลที่ log ไว้
    run_id = run.info.run_id
    model_uri = f"runs:/{run_id}/xgboost_model"

    # ลงทะเบียนโมเดล โดยกำหนดชื่อใน registry 
    registered_model = mlflow.register_model(model_uri=model_uri, name="XGBoostForecastModel")
    print("Registered model:", registered_model)

print("Training Complete. Check MLflow UI for details.")

เปิดเว็บ MLFlow UI บน localhost ของตัวเอง โดยไปเขียนที่ Terminal -> mlflow ui --host localhost --port 5000