In [2]:
# Step 1: Import required libraries
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
import numpy as np
import pandas as pd

# Step 2: Load the dataset
file_path = 'data.csv'
df = pd.read_csv(file_path)

# Step 3: Split data into features (X) and target (Y)
Y_COLS = ['MMT', 'CNF', 'Gelatin', 'Glycerol']
X_COLS = ['TransVis', 'RR', 'TensileStrength']

# Split the dataset into training and testing sets (e.g., 80% train, 20% test)
train_size = int(0.8 * len(df))
df_train = df.iloc[:train_size]
df_test = df.iloc[train_size:]

X_train = df_train[X_COLS]  # Training input features
Y_train = df_train[Y_COLS]  # Training target variables

X_test = df_test[X_COLS]    # Testing input features
Y_test = df_test[Y_COLS]    # Testing target variables

# Step 4: Define hyperparameter search space
param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'bootstrap': [True, False]
}

# Step 5: Perform hyperparameter tuning with RandomizedSearchCV
best_params = {}
rf_models = {}

for target in Y_COLS:
    rf = RandomForestRegressor(random_state=42)
    random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=10, cv=3, n_jobs=-1, verbose=1, random_state=42)
    random_search.fit(X_train, Y_train[target])
    best_params[target] = random_search.best_params_

    # Train the best model
    rf_best = RandomForestRegressor(**best_params[target], random_state=42)
    rf_best.fit(X_train, Y_train[target])
    rf_models[target] = rf_best

# Step 6: Make predictions on the test set
predictions = {}
for target in Y_COLS:
    predictions[target] = rf_models[target].predict(X_test)

# Step 7: Calculate the combined Mean Relative Error (MRE) and Accuracy

# Combine all true and predicted values for MRE calculation
Y_test_combined = np.hstack([Y_test[target].values for target in Y_COLS])
predictions_combined = np.hstack([predictions[target] for target in Y_COLS])

# Calculate Mean Relative Error (MRE)
def mean_relative_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true))

overall_mre = mean_relative_error(Y_test_combined, predictions_combined)

# Calculate Accuracy
threshold = 0.1  # 10% tolerance
overall_accuracy = np.mean(np.abs((Y_test_combined - predictions_combined) / Y_test_combined) < threshold)

# Step 8: Display the results
print("Best Parameters for each target variable:")
for target in Y_COLS:
    print(f"{target}: {best_params[target]}")


print(f"Overall Accuracy: {overall_accuracy * 100:.2f}%")

Fitting 3 folds for each of 10 candidates, totalling 30 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Fitting 3 folds for each of 10 candidates, totalling 30 fits
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best Parameters for each target variable:
MMT: {'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 20, 'bootstrap': True}
CNF: {'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 20, 'bootstrap': True}
Gelatin: {'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 20, 'bootstrap': True}
Glycerol: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': True}
Overall Accuracy: 92.73%


In [None]:
# ---------------------------------------
# New Step: Predict material ratios for given properties
# ---------------------------------------

# Function to predict using trained models
def predict_material_ratios(new_data):
    # Create a DataFrame with the new input data
    new_input_df = pd.DataFrame([new_data], columns=X_COLS)

    # Predict output values for the given input
    new_predictions = {}
    for target in Y_COLS:
        new_predictions[target] = rf_models[target].predict(new_input_df)[0]

    return new_predictions

# Take input from the user
def get_input_from_user():
    print("Please enter the values for the following properties:")
    trans_vis = float(input("TransVis: "))
    rr = float(input("RR: "))
    tensile_strength = float(input("TensileStrength: "))

    return {
        'TransVis': trans_vis,
        'RR': rr,
        'TensileStrength': tensile_strength
    }

# Get input data from user
new_input_data = get_input_from_user()

# Predict the material ratios
predicted_ratios = predict_material_ratios(new_input_data)

# Display the predictions
print(f"\nPredicted Material Ratios for the given properties:")
print(f"MMT: {predicted_ratios['MMT']:.4f}")
print(f"CNF: {predicted_ratios['CNF']:.4f}")
print(f"Gelatin: {predicted_ratios['Gelatin']:.4f}")
print(f"Glycerol: {predicted_ratios['Glycerol']:.4f}")


Please enter the values for the following properties:
TransVis: 75
RR: 0.7
TensileStrength: 100

Predicted Material Ratios for the given properties:
MMT: 33.9276
CNF: 37.7657
Gelatin: 11.6184
Glycerol: 15.2128


In [None]:
0