In [11]:
import sys, subprocess, importlib
def pip_try(*pkgs):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", *pkgs])
        print("‚úÖ", " ".join(pkgs))
    except Exception as e:
        print("‚ö†Ô∏è  skip", " ".join(pkgs), "->", e)

pip_try("pandas", "numpy", "scikit-learn", "matplotlib", "joblib", "scipy")
import sklearn, numpy, pandas, scipy
print("Python:", sys.executable)
print("sklearn:", sklearn.__version__)

‚úÖ pandas numpy scikit-learn matplotlib joblib scipy
Python: c:\Users\hamas\AppData\Local\Programs\Python\Python313\python.exe
sklearn: 1.7.1


In [12]:
import os, re, json, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from typing import Dict, Any, List

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import mode


import scipy.sparse as sp
import joblib

try:
    from IPython.display import display
except Exception:
    pass


In [13]:
class RuleFitLite:
    def __init__(self, n_estimators=200, max_depth=5, min_samples_leaf=1,
                 random_state=42, alpha=1.0):
        self.rf = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_leaf=min_samples_leaf,
            random_state=random_state,
            n_jobs=-1,
        )
        # C = 1/alpha
        self.alpha = float(alpha)
        self.lr = LogisticRegression(
            penalty="l1",
            solver="saga",
            C=1.0/max(self.alpha, 1e-8),
            max_iter=3000,
            multi_class="auto",
        )
        self.classes_ = None

    def _rules_matrix(self, X):
        mats = []
        for est in self.rf.estimators_:
            M = est.decision_path(X)
            if M.shape[1] > 1:
                M = M[:, 1:]
            mats.append(M)
        if len(mats) == 1:
            return mats[0].tocsr()
        return sp.hstack(mats, format="csr")

    def fit(self, X, y):
        X = np.asarray(X)
        self.rf.fit(X, y)
        self.classes_ = np.unique(y)
        R = self._rules_matrix(X)
        self.lr.fit(R, y)
        return self

    def predict_proba(self, X):
        X = np.asarray(X)
        R = self._rules_matrix(X)
        return self.lr.predict_proba(R)

    def predict(self, X):
        probs = self.predict_proba(X)
        idx = probs.argmax(axis=1)
        return np.array([self.lr.classes_[i] for i in idx])


In [14]:
DATA_PATH = "./dataset_normalized_clean.csv"

TARGET_COL = "Role"

if DATA_PATH is None:
    csvs = [f for f in os.listdir('.') if f.lower().endswith('.csv')]
    if len(csvs) == 1:
        DATA_PATH = csvs[0]
        print(f"Auto-detected dataset: {DATA_PATH}")
    elif len(csvs) > 1:
        picks = [f for f in csvs if re.search(r'career|map|skill|job|role', f, re.I)]
        if picks:
            DATA_PATH = picks[0]
            print(f"Auto-detected dataset (heuristic): {DATA_PATH}")
        else:
            print("Multiple CSV ditemukan. Set DATA_PATH manual.")
    else:
        print("Tidak ada file .csv. Set DATA_PATH manual.")

if DATA_PATH is None:
    raise FileNotFoundError("Set DATA_PATH ke file dataset kamu (CSV/XLSX).")

if DATA_PATH.lower().endswith('.csv'):
    df = pd.read_csv(DATA_PATH)
elif DATA_PATH.lower().endswith(('.xlsx', '.xls')):
    df = pd.read_excel(DATA_PATH)

print("Shape:", df.shape)
try: display(df.head())
except: print(df.head())

Shape: (9162, 28)


Unnamed: 0,Database Fundamentals,Computer Architecture,Distributed Computing Systems,Cyber Security,Networking,Software Development,Programming Skills,Project Management,Computer Forensics Fundamentals,Technical Communication,...,Conscientousness,Extraversion,Agreeableness,Emotional_Range,Conversation,Openness to Change,Hedonism,Self-enhancement,Self-transcendence,Role
0,5,4,4,4,4,4,4,4,4,4,...,4,4,1,5,3,2,1,1,2,Database Administrator
1,5,4,4,4,4,4,4,4,4,4,...,3,3,2,4,2,2,1,1,2,Database Administrator
2,5,1,4,4,4,4,4,4,4,4,...,3,3,3,3,3,3,2,2,4,Database Administrator
3,5,1,4,4,4,4,4,4,4,4,...,4,3,3,5,4,2,3,3,4,Database Administrator
4,5,3,4,4,4,4,4,4,4,4,...,1,1,1,3,2,1,3,3,2,Database Administrator


In [15]:
if TARGET_COL is None:
    candidates = [c for c in df.columns if re.search(r'(job|role|position|pekerjaan|label|target)', c, re.I)]
    TARGET_COL = candidates[0] if candidates else df.columns[-1]
print("Target column:", TARGET_COL)

X = df.drop(columns=[TARGET_COL])
y = df[TARGET_COL]

num_cols = [c for c in X.columns if pd.api.types.is_numeric_dtype(X[c])]
cat_cols = [c for c in X.columns if c not in num_cols]
print("Numerik:", num_cols[:10], "..." if len(num_cols)>10 else "")
print("Kategorikal:", cat_cols[:10], "..." if len(cat_cols)>10 else "")

numeric_transformer = Pipeline(steps=[("imputer", SimpleImputer(strategy="median"))])
categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
])
preprocess = ColumnTransformer([
    ("num", numeric_transformer, num_cols),
    ("cat", categorical_transformer, cat_cols),
])
feature_columns = list(X.columns)

Target column: Role
Numerik: ['Database Fundamentals', 'Computer Architecture', 'Distributed Computing Systems', 'Cyber Security', 'Networking', 'Software Development', 'Programming Skills', 'Project Management', 'Computer Forensics Fundamentals', 'Technical Communication'] ...
Kategorikal: [] 


In [16]:
def categorize_skills(columns):
    soft_skill_keywords = [
        'openness', 'conscientousness', 'extraversion', 'agreeableness', 
        'neuroticism', 'emotional', 'communication', 'leadership', 
        'teamwork', 'creativity', 'critical', 'problem', 'conversation',
        'hedonism', 'enhancement', 'transcendence', 'change'
    ]
    
    hard_skills = []
    soft_skills = []
    
    for col in columns:
        col_lower = col.lower()
        is_soft = any(keyword in col_lower for keyword in soft_skill_keywords)
        
        if is_soft:
            soft_skills.append(col)
        else:
            hard_skills.append(col)
    
    return hard_skills, soft_skills

hard_skill_cols, soft_skill_cols = categorize_skills(num_cols)

print("="*60)
print("KATEGORISASI SKILL")
print("="*60)
print(f"\nüíª Hard Skills ({len(hard_skill_cols)}):")
for skill in hard_skill_cols:
    print(f"   - {skill}")

print(f"\nüß† Soft Skills ({len(soft_skill_cols)}):")
for skill in soft_skill_cols:
    print(f"   - {skill}")

X_hard = X[hard_skill_cols].copy()
X_soft = X[soft_skill_cols].copy()

print(f"\n‚úÖ Data split:")
print(f"   Hard Skills shape: {X_hard.shape}")
print(f"   Soft Skills shape: {X_soft.shape}")

KATEGORISASI SKILL

üíª Hard Skills (15):
   - Database Fundamentals
   - Computer Architecture
   - Distributed Computing Systems
   - Cyber Security
   - Networking
   - Software Development
   - Programming Skills
   - Project Management
   - Computer Forensics Fundamentals
   - AI ML
   - Software Engineering
   - Business Analysis
   - Data Science
   - Troubleshooting skills
   - Graphics Designing

üß† Soft Skills (12):
   - Technical Communication
   - Communication skills
   - Openness
   - Conscientousness
   - Extraversion
   - Agreeableness
   - Emotional_Range
   - Conversation
   - Openness to Change
   - Hedonism
   - Self-enhancement
   - Self-transcendence

‚úÖ Data split:
   Hard Skills shape: (9162, 15)
   Soft Skills shape: (9162, 12)


In [17]:
X_train_hard, X_val_hard, y_train_hard, y_val_hard = train_test_split(
    X_hard, y, test_size=0.2, random_state=42, stratify=y if y.nunique()>1 else None
)

X_train_soft, X_val_soft, y_train_soft, y_val_soft = train_test_split(
    X_soft, y, test_size=0.2, random_state=42, stratify=y if y.nunique()>1 else None
)

print("="*60)
print("TRAIN-TEST SPLIT")
print("="*60)
print(f"Hard Skills - Train: {X_train_hard.shape}, Val: {X_val_hard.shape}")
print(f"Soft Skills - Train: {X_train_soft.shape}, Val: {X_val_soft.shape}")

print("\nüìÑ Sample Hard Skills:")
display(X_train_hard.head())

print("\nüìÑ Sample Soft Skills:")
display(X_train_soft.head())

TRAIN-TEST SPLIT
Hard Skills - Train: (7329, 15), Val: (1833, 15)
Soft Skills - Train: (7329, 12), Val: (1833, 12)

üìÑ Sample Hard Skills:


Unnamed: 0,Database Fundamentals,Computer Architecture,Distributed Computing Systems,Cyber Security,Networking,Software Development,Programming Skills,Project Management,Computer Forensics Fundamentals,AI ML,Software Engineering,Business Analysis,Data Science,Troubleshooting skills,Graphics Designing
7842,4,4,4,4,4,4,4,4,4,4,4,4,5,4,4
2007,3,3,3,5,3,3,3,3,3,3,3,3,1,3,3
2748,1,1,1,1,1,5,1,4,1,1,1,1,1,1,1
8753,3,3,3,3,3,3,4,3,3,3,3,3,3,3,5
2689,3,3,3,4,5,3,3,3,3,3,3,3,3,3,3



üìÑ Sample Soft Skills:


Unnamed: 0,Technical Communication,Communication skills,Openness,Conscientousness,Extraversion,Agreeableness,Emotional_Range,Conversation,Openness to Change,Hedonism,Self-enhancement,Self-transcendence
7842,4,4,2,2,3,3,1,2,5,2,3,2
2007,3,3,4,4,3,2,4,2,3,3,4,2
2748,1,1,5,1,2,2,1,1,5,5,4,3
8753,3,3,1,1,1,4,1,5,5,5,3,5
2689,3,3,2,5,2,5,3,5,5,4,3,5


In [18]:
numeric_transformer_hard = Pipeline(steps=[("imputer", SimpleImputer(strategy="median"))])
preprocess_hard = ColumnTransformer([
    ("num", numeric_transformer_hard, hard_skill_cols)
])

numeric_transformer_soft = Pipeline(steps=[("imputer", SimpleImputer(strategy="median"))])
preprocess_soft = ColumnTransformer([
    ("num", numeric_transformer_soft, soft_skill_cols)
])

print("‚úÖ Preprocessing pipelines created for Hard & Soft Skills")

‚úÖ Preprocessing pipelines created for Hard & Soft Skills


In [19]:
print("="*60)
print("TRAINING MODEL - HARD SKILLS")
print("="*60)

dt_pipe_hard = Pipeline(steps=[
    ("preprocess", preprocess_hard),
    ("clf", DecisionTreeClassifier(
        criterion="gini",
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        random_state=42
    ))
])
dt_pipe_hard.fit(X_train_hard, y_train_hard)
y_pred_dt_hard = dt_pipe_hard.predict(X_val_hard)

acc_dt_hard = accuracy_score(y_val_hard, y_pred_dt_hard)
f1w_dt_hard = f1_score(y_val_hard, y_pred_dt_hard, average="weighted")
f1m_dt_hard = f1_score(y_val_hard, y_pred_dt_hard, average="macro")

print(f"DT (Hard) -> acc={acc_dt_hard:.4f}, f1_weighted={f1w_dt_hard:.4f}, f1_macro={f1m_dt_hard:.4f}")

TRAINING MODEL - HARD SKILLS
DT (Hard) -> acc=0.9182, f1_weighted=0.9132, f1_macro=0.9069


In [20]:
pre_X_train_hard = preprocess_hard.fit_transform(X_train_hard)
pre_X_val_hard = preprocess_hard.transform(X_val_hard)

rf_lite_hard = RuleFitLite(
    n_estimators=300,
    max_depth=5,
    min_samples_leaf=1,
    random_state=42,
    alpha=1.0
)
rf_lite_hard.fit(pre_X_train_hard, y_train_hard)
y_pred_rf_hard = rf_lite_hard.predict(pre_X_val_hard)

acc_rf_hard = accuracy_score(y_val_hard, y_pred_rf_hard)
f1w_rf_hard = f1_score(y_val_hard, y_pred_rf_hard, average="weighted")
f1m_rf_hard = f1_score(y_val_hard, y_pred_rf_hard, average="macro")

print(f"RF (Hard) -> acc={acc_rf_hard:.4f}, f1_weighted={f1w_rf_hard:.4f}, f1_macro={f1m_rf_hard:.4f}")

RF (Hard) -> acc=0.9345, f1_weighted=0.9345, f1_macro=0.9299


In [21]:
print("="*60)
print("TRAINING MODEL - SOFT SKILLS")
print("="*60)

dt_pipe_soft = Pipeline(steps=[
    ("preprocess", preprocess_soft),
    ("clf", DecisionTreeClassifier(
        criterion="gini",
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        random_state=42
    ))
])
dt_pipe_soft.fit(X_train_soft, y_train_soft)
y_pred_dt_soft = dt_pipe_soft.predict(X_val_soft)

acc_dt_soft = accuracy_score(y_val_soft, y_pred_dt_soft)
f1w_dt_soft = f1_score(y_val_soft, y_pred_dt_soft, average="weighted")
f1m_dt_soft = f1_score(y_val_soft, y_pred_dt_soft, average="macro")

print(f"DT (Soft) -> acc={acc_dt_soft:.4f}, f1_weighted={f1w_dt_soft:.4f}, f1_macro={f1m_dt_soft:.4f}")

TRAINING MODEL - SOFT SKILLS
DT (Soft) -> acc=1.0000, f1_weighted=1.0000, f1_macro=1.0000


In [22]:
pre_X_train_soft = preprocess_soft.fit_transform(X_train_soft)
pre_X_val_soft = preprocess_soft.transform(X_val_soft)

rf_lite_soft = RuleFitLite(
    n_estimators=300,
    max_depth=5,
    min_samples_leaf=1,
    random_state=42,
    alpha=1.0
)
rf_lite_soft.fit(pre_X_train_soft, y_train_soft)
y_pred_rf_soft = rf_lite_soft.predict(pre_X_val_soft)

acc_rf_soft = accuracy_score(y_val_soft, y_pred_rf_soft)
f1w_rf_soft = f1_score(y_val_soft, y_pred_rf_soft, average="weighted")
f1m_rf_soft = f1_score(y_val_soft, y_pred_rf_soft, average="macro")

print(f"RF (Soft) -> acc={acc_rf_soft:.4f}, f1_weighted={f1w_rf_soft:.4f}, f1_macro={f1m_rf_soft:.4f}")

RF (Soft) -> acc=1.0000, f1_weighted=1.0000, f1_macro=1.0000


In [23]:
def ensemble_predict(X_hard, X_soft, model_type="rulefit"):
    if model_type == "dt":
        pred_hard = dt_pipe_hard.predict(X_hard)
        pred_soft = dt_pipe_soft.predict(X_soft)
        
        proba_hard = dt_pipe_hard.predict_proba(X_hard)
        proba_soft = dt_pipe_soft.predict_proba(X_soft)
    else:  
        pre_X_hard = preprocess_hard.transform(X_hard)
        pre_X_soft = preprocess_soft.transform(X_soft)
        
        pred_hard = rf_lite_hard.predict(pre_X_hard)
        pred_soft = rf_lite_soft.predict(pre_X_soft)
        
        proba_hard = rf_lite_hard.predict_proba(pre_X_hard)
        proba_soft = rf_lite_soft.predict_proba(pre_X_soft)
    
    if model_type == "dt":
        classes = dt_pipe_hard.classes_
    else:
        classes = rf_lite_hard.lr.classes_
    
    weight_hard = 0.6
    weight_soft = 0.4
    
    proba_ensemble = weight_hard * proba_hard + weight_soft * proba_soft
    pred_ensemble = classes[proba_ensemble.argmax(axis=1)]
    
    return pred_ensemble, proba_ensemble

print("="*60)
print("ENSEMBLE PREDICTION - DECISION TREE")
print("="*60)
y_pred_ensemble_dt, proba_ensemble_dt = ensemble_predict(X_val_hard, X_val_soft, model_type="dt")

acc_ensemble_dt = accuracy_score(y_val_hard, y_pred_ensemble_dt)
f1w_ensemble_dt = f1_score(y_val_hard, y_pred_ensemble_dt, average="weighted")
f1m_ensemble_dt = f1_score(y_val_hard, y_pred_ensemble_dt, average="macro")

print(f"Ensemble DT -> acc={acc_ensemble_dt:.4f}, f1_weighted={f1w_ensemble_dt:.4f}, f1_macro={f1m_ensemble_dt:.4f}")
print("\nClassification Report (Ensemble DT):\n", classification_report(y_val_hard, y_pred_ensemble_dt))

print("="*60)
print("ENSEMBLE PREDICTION - RULEFIT")
print("="*60)
y_pred_ensemble_rf, proba_ensemble_rf = ensemble_predict(X_val_hard, X_val_soft, model_type="rulefit")

acc_ensemble_rf = accuracy_score(y_val_hard, y_pred_ensemble_rf)
f1w_ensemble_rf = f1_score(y_val_hard, y_pred_ensemble_rf, average="weighted")
f1m_ensemble_rf = f1_score(y_val_hard, y_pred_ensemble_rf, average="macro")

print(f"Ensemble RF -> acc={acc_ensemble_rf:.4f}, f1_weighted={f1w_ensemble_rf:.4f}, f1_macro={f1m_ensemble_rf:.4f}")
print("\nClassification Report (Ensemble RF):\n", classification_report(y_val_hard, y_pred_ensemble_rf))

ENSEMBLE PREDICTION - DECISION TREE
Ensemble DT -> acc=0.9847, f1_weighted=0.9847, f1_macro=0.9836

Classification Report (Ensemble DT):
                                  precision    recall  f1-score   support

               AI ML Specialist       1.00      1.00      1.00       216
                 API Specialist       1.00      1.00      1.00       108
   Application Support Engineer       1.00      1.00      1.00       108
               Business Analyst       1.00      1.00      1.00       108
     Customer Service Executive       0.88      0.85      0.87       108
      Cyber Security Specialist       1.00      1.00      1.00       108
         Database Administrator       1.00      1.00      1.00       108
              Graphics Designer       1.00      1.00      1.00       107
              Hardware Engineer       1.00      1.00      1.00       108
              Helpdesk Engineer       1.00      1.00      1.00       108
Information Security Specialist       1.00      1.00      

In [24]:
metrics_comparison = pd.DataFrame({
    "Model": [
        "DT - Hard Only",
        "DT - Soft Only", 
        "DT - Ensemble",
        "RF - Hard Only",
        "RF - Soft Only",
        "RF - Ensemble"
    ],
    "Accuracy": [
        acc_dt_hard, acc_dt_soft, acc_ensemble_dt,
        acc_rf_hard, acc_rf_soft, acc_ensemble_rf
    ],
    "F1 Weighted": [
        f1w_dt_hard, f1w_dt_soft, f1w_ensemble_dt,
        f1w_rf_hard, f1w_rf_soft, f1w_ensemble_rf
    ],
    "F1 Macro": [
        f1m_dt_hard, f1m_dt_soft, f1m_ensemble_dt,
        f1m_rf_hard, f1m_rf_soft, f1m_ensemble_rf
    ]
})

print("="*60)
print("PERBANDINGAN SEMUA MODEL")
print("="*60)
display(metrics_comparison.sort_values("F1 Macro", ascending=False))

best_row = metrics_comparison.sort_values("F1 Macro", ascending=False).iloc[0]
best_model_name = best_row["Model"]
print(f"\nüèÜ Best Model: {best_model_name} (F1 Macro: {best_row['F1 Macro']:.4f})")

PERBANDINGAN SEMUA MODEL


Unnamed: 0,Model,Accuracy,F1 Weighted,F1 Macro
1,DT - Soft Only,1.0,1.0,1.0
5,RF - Ensemble,1.0,1.0,1.0
4,RF - Soft Only,1.0,1.0,1.0
2,DT - Ensemble,0.984724,0.984722,0.983643
3,RF - Hard Only,0.934534,0.934522,0.9299
0,DT - Hard Only,0.918167,0.913229,0.906885



üèÜ Best Model: DT - Soft Only (F1 Macro: 1.0000)


In [25]:
print("\n" + "="*60)
print("SAVING ARTIFACTS")
print("="*60)

artifacts_dir = "artifacts"
os.makedirs(artifacts_dir, exist_ok=True)

dt_hard_path = os.path.join(artifacts_dir, "dt_hard_normalized.pth")
dt_soft_path = os.path.join(artifacts_dir, "dt_soft_normalized.pth")
rf_hard_path = os.path.join(artifacts_dir, "rf_hard_normalized.pth")
rf_soft_path = os.path.join(artifacts_dir, "rf_soft_normalized.pth")

joblib.dump(dt_pipe_hard, dt_hard_path)
joblib.dump(dt_pipe_soft, dt_soft_path)
joblib.dump(rf_lite_hard, rf_hard_path)
joblib.dump(rf_lite_soft, rf_soft_path)

print(f"‚úÖ Saved: {dt_hard_path}")
print(f"‚úÖ Saved: {dt_soft_path}")
print(f"‚úÖ Saved: {rf_hard_path}")
print(f"‚úÖ Saved: {rf_soft_path}")

preprocess_hard_path = os.path.join(artifacts_dir, "preprocess_hard_normalized.pth")
preprocess_soft_path = os.path.join(artifacts_dir, "preprocess_soft_normalized.pth")

joblib.dump(preprocess_hard, preprocess_hard_path)
joblib.dump(preprocess_soft, preprocess_soft_path)

print(f"‚úÖ Saved: {preprocess_hard_path}")
print(f"‚úÖ Saved: {preprocess_soft_path}")

featcol_path = os.path.join(artifacts_dir, "feature_columns.json")
with open(featcol_path, "w", encoding="utf-8") as f:
    json.dump(feature_columns, f, ensure_ascii=False, indent=2)
print(f"‚úÖ Saved: {featcol_path}")

metadata = {
    "hard_skills": hard_skill_cols,
    "soft_skills": soft_skill_cols,
    "ensemble_weights": {
        "hard": 0.6,
        "soft": 0.4
    },
    "metrics": {
        "dt_hard": {
            "accuracy": float(acc_dt_hard),
            "f1_weighted": float(f1w_dt_hard),
            "f1_macro": float(f1m_dt_hard)
        },
        "dt_soft": {
            "accuracy": float(acc_dt_soft),
            "f1_weighted": float(f1w_dt_soft),
            "f1_macro": float(f1m_dt_soft)
        },
        "dt_ensemble": {
            "accuracy": float(acc_ensemble_dt),
            "f1_weighted": float(f1w_ensemble_dt),
            "f1_macro": float(f1m_ensemble_dt)
        },
        "rf_hard": {
            "accuracy": float(acc_rf_hard),
            "f1_weighted": float(f1w_rf_hard),
            "f1_macro": float(f1m_rf_hard)
        },
        "rf_soft": {
            "accuracy": float(acc_rf_soft),
            "f1_weighted": float(f1w_rf_soft),
            "f1_macro": float(f1m_rf_soft)
        },
        "rf_ensemble": {
            "accuracy": float(acc_ensemble_rf),
            "f1_weighted": float(f1w_ensemble_rf),
            "f1_macro": float(f1m_ensemble_rf)
        }
    },
    "best_model": best_model_name,
    "dataset_shape": df.shape,
    "num_classes": len(y.unique()),
    "train_test_split": {
        "test_size": 0.2,
        "random_state": 42
    }
}

metadata_path = os.path.join(artifacts_dir, "model_metadata.json")
with open(metadata_path, "w", encoding="utf-8") as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"‚úÖ Saved: {metadata_path}")

best_model_path = os.path.join(artifacts_dir, "best_model_name.json")
with open(best_model_path, "w", encoding="utf-8") as f:
    json.dump({"best_model": best_model_name}, f, ensure_ascii=False, indent=2)
print(f"‚úÖ Saved: {best_model_path}")


SAVING ARTIFACTS
‚úÖ Saved: artifacts\dt_hard_normalized.pth
‚úÖ Saved: artifacts\dt_soft_normalized.pth
‚úÖ Saved: artifacts\rf_hard_normalized.pth
‚úÖ Saved: artifacts\rf_soft_normalized.pth
‚úÖ Saved: artifacts\preprocess_hard_normalized.pth
‚úÖ Saved: artifacts\preprocess_soft_normalized.pth
‚úÖ Saved: artifacts\feature_columns.json
‚úÖ Saved: artifacts\model_metadata.json
‚úÖ Saved: artifacts\best_model_name.json
