In [6]:
# Importing the essential libraries
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor

In [9]:
# Reading the cleaned and combined dataset from the CSV file
data = pd.read_csv("Cleaned and combined dataset.csv")

In [10]:
# Number of folds for K-fold cross-validation
K = 10

# Separate features (x) and target variable (y)
x = data.drop(columns=['estimated_stock_pct'])
y = data['estimated_stock_pct']

# Ratio for splitting data into training and test sets
split = 0.75

# List to store accuracy scores during cross-validation
accuracy = []

# Instantiate a StandardScaler to standardize feature values
scaler = StandardScaler()

# Create training and test samples using the specified split ratio and random seed
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=split, random_state=42)

# Fit the scaler on the training data and transform both training and test data
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

# Assigning variables to the models
model_1 = RandomForestRegressor()
model_2 = DecisionTreeRegressor()

In [11]:
def train_and_test_RandomForestRegressor():
    print("Training and testing using RandomForestRegressor Model: -")
    
    # Loop through each fold in K-fold cross-validation
    for fold in range(0, K):
    
        # Train the RandomForestRegressor model
        trained_model = model_1.fit(x_train, y_train)
    
        # Generate predictions on the test sample
        y_pred = trained_model.predict(x_test)
    
        # Compute accuracy using mean absolute error (MAE)
        mae = mean_absolute_error(y_true=y_test, y_pred=y_pred)
    
        # Append the MAE to the accuracy list
        accuracy.append(mae)
    
        # Print the MAE for the current fold
        print(f"Fold {fold + 1}: MAE = {mae:.3f}")
    
    # Calculate and print the average MAE across all folds
    print(f"Average MAE for RandomForestRegressor Model: {(sum(accuracy) / len(accuracy)):.2f}")

In [13]:
def evaluation_RandomForestRegressor():
    print("Top-5 features and their relative-importance for predicting the target variable(estimated_stock_pct) using RandomForestRegressor Model: -")
    """
    Evaluate and print the top 5 features and their importances from a RandomForestRegressor model.

    Parameters:
    - model: RandomForestRegressor model trained on the dataset.
    - x: DataFrame containing features.

    Returns:
    None
    """
    features = [i.split("__")[0] for i in x.columns]
    importances = model_1.feature_importances_
    sorted_features = sorted(zip(features, importances), key=lambda x: x[1], reverse=True)

    # Print the top 5 features and their importances
    top_features = sorted_features[:5]
    for feature, importance in top_features:
        print(f"{feature} : {round(importance, 3)}")

In [14]:
def train_and_test_DecisionTreeRegressor():
    print("Training and testing using DecisionTreeRegressor Model: -")
    
    # Loop through each fold in K-fold cross-validation
    for fold in range(0, K):
    
        # Train the DecisionTreeRegressor model
        trained_model_2 = model_2.fit(x_train, y_train)
    
        # Generate predictions on the test sample
        y_pred = trained_model_2.predict(x_test)
    
        # Compute accuracy using mean absolute error (MAE)
        mae = mean_absolute_error(y_true=y_test, y_pred=y_pred)
    
        # Append the MAE to the accuracy list
        accuracy.append(mae)
    
        # Print the MAE for the current fold
        print(f"Fold {fold + 1}: MAE = {mae:.3f}")
    
    # Calculate and print the average MAE across all folds
    print(f"Average MAE for DecisionTreeRegressor Model: {(sum(accuracy) / len(accuracy)):.2f}")

In [15]:
def evaluation_DecisionTreeRegressor():
    print("Top-5 features and their relative-importance for predicting the target variable(estimated_stock_pct) using DecisionTreeRegressor Model: -")
    """
    Evaluate and print the top 5 features and their importances from a DecisionTreeRegressor model.

    Parameters:
    - model: DecisionTreeRegressor model trained on the dataset.
    - x: DataFrame containing features.

    Returns:
    None
    """
    features_2 = [i.split("__")[0] for i in x.columns]
    importances_2 = model_2.feature_importances_
    sorted_features_2 = sorted(zip(features_2, importances_2), key=lambda x: x[1], reverse=True)

    # Print the top 5 features and their importances
    top_features_2 = sorted_features_2[:5]
    for feature, importance in top_features_2:
        print(f"{feature} : {round(importance, 3)}")

In [16]:
# Calling the function to train and test the RandomForestRegressor model
train_and_test_RandomForestRegressor()

Training and testing using RandomForestRegressor Model: -
Fold 1: MAE = 0.237
Fold 2: MAE = 0.236
Fold 3: MAE = 0.236
Fold 4: MAE = 0.236
Fold 5: MAE = 0.237
Fold 6: MAE = 0.237
Fold 7: MAE = 0.236
Fold 8: MAE = 0.236
Fold 9: MAE = 0.236
Fold 10: MAE = 0.236
Average MAE for RandomForestRegressor Model: 0.24


In [17]:
# Calling the function to evaluate and print the top 5 features and their relative-importance for RandomForestRegressor model
evaluation_RandomForestRegressor()

Top-5 features and their relative-importance for predicting the target variable(estimated_stock_pct) using RandomForestRegressor Model: -
unit_price : 0.278
temperature : 0.169
timestamp_hour : 0.11
quantity : 0.085
timestamp_day_of_week : 0.05


In [18]:
# Calling the function to train and test the DecisionTreeRegressor model
train_and_test_DecisionTreeRegressor()

Training and testing using DecisionTreeRegressor Model: -
Fold 1: MAE = 0.314
Fold 2: MAE = 0.313
Fold 3: MAE = 0.313
Fold 4: MAE = 0.313
Fold 5: MAE = 0.314
Fold 6: MAE = 0.313
Fold 7: MAE = 0.310
Fold 8: MAE = 0.313
Fold 9: MAE = 0.314
Fold 10: MAE = 0.312
Average MAE for DecisionTreeRegressor Model: 0.27


In [19]:
# Calling the function to evaluate and print the top 5 features of DecisionTreeRegressor model
evaluation_DecisionTreeRegressor()

Top-5 features and their relative-importance for predicting the target variable(estimated_stock_pct) using DecisionTreeRegressor Model: -
unit_price : 0.288
temperature : 0.163
timestamp_hour : 0.106
quantity : 0.097
timestamp_day_of_week : 0.058
