# Importing Packages

In [None]:
pip install mlflow xgboost optuna dagshub

In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import mlflow
import optuna
import optuna.logging

# Load and Preview Data

In [3]:
df = pd.read_csv("Modified_Mock_Sales_Data__Fashion_.csv", parse_dates=["date"])

In [4]:
df

Unnamed: 0,date,demand,sales,price,category,store,region,season,stock_level,promotions,weather
0,2024-04-05,51,1,47.44,Clothing,Store C,North,Autumn,40,No,Sunny
1,2024-05-01,873,849,20.45,Footwear,Store E,West,Autumn,139,No,Sunny
2,2024-11-01,152,69,47.75,Accessories,Store C,East,Autumn,183,Yes,Snowy
3,2024-02-09,998,945,43.57,Footwear,Store A,South,Summer,330,Yes,Rainy
4,2023-09-05,584,488,9.67,Footwear,Store C,West,Autumn,332,No,Rainy
...,...,...,...,...,...,...,...,...,...,...,...
495,2023-09-11,974,879,44.72,Accessories,Store C,East,Spring,143,No,Snowy
496,2023-09-27,173,86,38.37,Accessories,Store B,West,Winter,390,No,Sunny
497,2024-10-22,961,876,45.76,Footwear,Store D,South,Summer,260,Yes,Rainy
498,2023-09-26,782,697,23.90,Jewelry,Store C,East,Winter,376,No,Cloudy


# Data Preprocessing

In [5]:
# แปลง categorical เป็น numeric features 
label_encoders = {}
categorical_cols = ["category", "store", "region", "season", "promotions", "weather"]
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

X = df.drop(columns=["date", "demand"])
y = df["demand"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define Objective Function

In [6]:
def objective(trial):
    with mlflow.start_run(nested=True):  # Run ย่อย
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300, step=10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
            'random_state': 42
        }
        model = xgb.XGBRegressor(**params)
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)

        mae = mean_absolute_error(y_test, y_pred)
        rmse = root_mean_squared_error(y_test, y_pred)
        
        # Log parameters & metrics
        mlflow.log_params(params)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("rmse", rmse)

        return rmse

# Create Experiment & Define Optimization Trials

In [None]:
# ถ้าจะ share เว็บ MLFlow UI
# import dagshub
# dagshub.init(repo_owner='your_git_name', repo_name='your_repo_name', mlflow=True)

In [None]:
mlflow.set_experiment('Optuna_Hyperparameter_Optimization_1')
# mlflow.set_tracking_uri("https://dagshub.com/your_git_name/your_repo_name.mlflow") ถ้าจะ share เว็บ MLFlow UI

In [9]:
optuna.logging.set_verbosity(optuna.logging.ERROR)
class ImprovementCallback:
    def __init__(self, log_file="improvement_log.txt"):
        self.best_value = float('inf')
        self.log_file = log_file
        self.logs = []  # เก็บข้อความที่ print ออกมา
    
    def __call__(self, study, trial):
        improvement = ((self.best_value - trial.value) / self.best_value) * 100 if self.best_value != float('inf') else None
        if improvement is not None:
            log_text = f"Trial {trial.number} achieved value: {trial.value:.4f} with {improvement:.4f}% improvement"
        else:
            log_text = f"Initial trial {trial.number} achieved value: {trial.value:.4f}"
        
        print(log_text)  
        self.logs.append(log_text)  # เก็บ log ไว้ใน list

        self.best_value = min(self.best_value, trial.value)

    def save_logs(self):
        with open(self.log_file, "w") as f:
            f.write("\n".join(self.logs))  # เขียน log ลงไฟล์
        mlflow.log_artifact(self.log_file)  # บันทึกไฟล์ .txt ขึ้น MLflow

In [None]:
with mlflow.start_run(run_name="XGBoost_Optuna_1") as parent_run:
    study = optuna.create_study(direction="minimize")
    callback = ImprovementCallback()
    study.optimize(objective, n_trials=10, callbacks=[callback])

    # บันทึก log ขึ้น MLflow หลังจาก tuning เสร็จ
    callback.save_logs()

    mlflow.log_params(study.best_params)
    mlflow.log_dict(study.best_params, "best_params.json")

print("Hyperparameter tuning complete.")

เปิดเว็บ MLFlow UI บน localhost ของตัวเอง โดยไปเขียนที่ Terminal -> mlflow ui --host localhost --port 5000