### Getting Started With ML Project With MLFLOW

- Installing MLflow.

- Starting a local MLflow Tracking Server.

- Logging and registering a model with MLflow.

- Loading a logged model for inference using MLflow’s pyfunc flavor.

- Viewing the experiment results in the MLflow UI.

### Install MLFlow

In [None]:
pip install mlflow

### Start Local MLflow Tracking Server

1. Open Terminal.

2. Run the below command to start MLflow on localhost on port `5001`:
   ```bash
   python -m mlflow server --host 127.0.0.1 --port 5001

## Access MLFlow Server

Open your browser and access the MLflow server at the following link:

   http://127.0.0.1:5001

### Import packages

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score
import mlflow
import mlflow.sklearn

### Load and preprocess the input

In [2]:
# Load the dataset
file_path = 'Walmart.csv' 
dataset = pd.read_csv(file_path)

# Convert 'transaction_date' to datetime and extract features
dataset['transaction_date'] = pd.to_datetime(dataset['transaction_date'], errors='coerce')
dataset['transaction_day'] = dataset['transaction_date'].dt.day
dataset['transaction_month'] = dataset['transaction_date'].dt.month
dataset['transaction_weekday'] = dataset['transaction_date'].dt.weekday
dataset['transaction_year'] = dataset['transaction_date'].dt.year
dataset = dataset.drop(columns=['transaction_date'])

# Encode categorical variables
categorical_columns = ['category', 'store_location', 'payment_method', 'promotion_applied', 
                       'promotion_type', 'weather_conditions', 'holiday_indicator', 'weekday', 
                       'customer_loyalty_level', 'customer_gender']

encoder = LabelEncoder()
for col in categorical_columns:
    dataset[col] = encoder.fit_transform(dataset[col].astype(str))

# Define features (X) and target (y)
X = dataset.drop(columns=['transaction_id', 'customer_id', 'product_id', 'product_name', 'actual_demand'])
y = dataset['actual_demand']

### Train the Model

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define hyperparameters for the model
params = {
    "n_estimators": 200,       # Number of trees
    "max_depth": None,         # No maximum depth
    "min_samples_split": 2,    # Minimum samples required to split an internal node
    "min_samples_leaf": 1,     # Minimum samples required to be a leaf node
    "random_state": 50         # Random seed for reproducibility
}

# Initialize and train the Random Forest Regressor
model = RandomForestRegressor(**params)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
print("Walmart Demand Forecasting model is trained")

Walmart Demand Forecasting model is trained


 ### Evaluate the Model

In [None]:
# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Output the results
print(f"Mean Absolute Error: {mae}")
print(f"R2 Score: {r2}")

### Track the experiment with MLFlow

In [6]:
from mlflow.models.signature import infer_signature

### MLFLOW tracking
mlflow.set_tracking_uri(uri="http://127.0.0.1:5001")
mlflow.set_experiment("Walmart Demand Forecast Model New")

# Infer model signature
signature = infer_signature(X_test, y_pred)

# Start an MLflow run
with mlflow.start_run():

    # Log parameters, metrics, and the model
    mlflow.log_params(params)
    mlflow.log_artifact(file_path)
    logged_model = mlflow.sklearn.log_model(model, "random_forest_regressor_model",signature=signature)
    mlflow.evaluate(
        model=logged_model.model_uri,
        data=pd.concat([X_test, y_test], axis=1),
        targets="actual_demand",
        model_type="regressor"
    )



# End MLflow run
mlflow.end_run()

    



Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2025/05/26 10:38:02 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run useful-mouse-237 at: http://127.0.0.1:5001/#/experiments/202062736812934130/runs/f2182e8b47cc498798dcb2bd245fa57e
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/202062736812934130


### Retrain the model with different params

In [None]:
# Define hyperparameters for the model
params = {
    "n_estimators": 150,       # Number of trees
    "max_depth": 5,         # No maximum depth
    "min_samples_split": 2,    # Minimum samples required to split an internal node
    "min_samples_leaf": 1,     # Minimum samples required to be a leaf node
    "random_state": 42         # Random seed for reproducibility
}

# Initialize and train the Random Forest Regressor
model = RandomForestRegressor(**params)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
print("Walmart Demand Forecasting model is Re-trained")

### Model Inference

In [7]:
# Load the dataset
file_path = 'Walmart_validation.csv' 
dataset = pd.read_csv(file_path)

# Convert 'transaction_date' to datetime and extract features
dataset['transaction_date'] = pd.to_datetime(dataset['transaction_date'], errors='coerce')
dataset['transaction_day'] = dataset['transaction_date'].dt.day
dataset['transaction_month'] = dataset['transaction_date'].dt.month
dataset['transaction_weekday'] = dataset['transaction_date'].dt.weekday
dataset['transaction_year'] = dataset['transaction_date'].dt.year
dataset = dataset.drop(columns=['transaction_date'])

# Encode categorical variables
categorical_columns = ['category', 'store_location', 'payment_method', 'promotion_applied', 
                       'promotion_type', 'weather_conditions', 'holiday_indicator', 'weekday', 
                       'customer_loyalty_level', 'customer_gender']

encoder = LabelEncoder()
for col in categorical_columns:
    dataset[col] = encoder.fit_transform(dataset[col].astype(str))

# Define features (X) and target (y)
X = dataset.drop(columns=['transaction_id', 'customer_id', 'product_id', 'product_name', 'actual_demand'])

In [None]:
# Load the model
logged_model = 'runs:/c8ba87d4c0614c7999faebfc85c3de88/random_forest_regressor_model'
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on validation data
predictions = loaded_model.predict(X)

# Display predictions
print(predictions)

### Access Registered Model

In [9]:
# Load the model
model_name="Walmart_Sales"
model_version="2"
model_uri = f'models:/{model_name}/{model_version}'
loaded_model = mlflow.pyfunc.load_model(model_uri)

# Predict on validation data
predictions = loaded_model.predict(X)

# Display predictions
print(predictions)

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

[227.43 405.39 372.31 378.35 388.77 282.41 267.1  275.21 267.35]
