In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load the data
data = pd.read_excel('model_data_selected_columns.xlsx')

# Extract input features and output labels
X = data.drop(columns=['> 35', '35 to 200', '< 200'])  # Drop the output columns
y = data[['> 35', '35 to 200', '< 200']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the model
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred, multioutput='raw_values')
r2 = r2_score(y_test, y_pred)

# Output the results
print("Mean Squared Error for each output variable:", mse)
print("R-squared Score:", r2)

Mean Squared Error for each output variable: [0.01530403 0.84704779 0.82254631]
R-squared Score: 0.9663669101772179


In [3]:
data.columns

Index(['T', 'A', 'S', 'M', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6',
       'Weigh_Feeder_rate_ A(TPH)', 'Weigh_feeder_rate_B(TPH)',
       'Ball_mill_amps', 'BALL_MILL_VENT_SYS_TEMP(Â°C)',
       'BALL_MILL_VENT_FAN_AMPS', 'BAG_FILTER_DP(mmWC)',
       'SCREW_CONV _ CURRENT(Amps)', 'BUCKET_ELEVATOR_CURRENT(Amps)', '> 35',
       '35 to 200', '< 200'],
      dtype='object')

In [4]:
import pandas as pd
import joblib
from sklearn.metrics import mean_squared_error

# Manually input the values for the single data record
single_record = {
    'T': 50,
    'A': 30,
    'S': 20,
    'M': 0,
    'X1': 25.12,
    'X2': 6.88,
    'X3': 22.05,
    'X4': 13.95,
    'X5': 9.48,
    'X6': 7.23,
    'Weigh_Feeder_rate_ A(TPH)': 26,
    'Weigh_feeder_rate_B(TPH)': 65,
    'Ball_mill_amps': 76,
    'BALL_MILL_VENT_SYS_TEMP(Â°C)': 45,
    'BALL_MILL_VENT_FAN_AMPS': 55,
    'BAG_FILTER_DP(mmWC)': 412,
    'SCREW_CONV _ CURRENT(Amps)': 31,
    'BUCKET_ELEVATOR_CURRENT(Amps)': 26
}

# Convert the dictionary to a DataFrame
single_record_df = pd.DataFrame([single_record])


# Make prediction
prediction = model.predict(single_record_df)

# Convert prediction to a DataFrame for better readability
prediction_df = pd.DataFrame(prediction, columns=['> 35', '35 to 200', '< 200'])

# Manually input the actual target values for comparison
actual_target = {
    '> 35': 2.3,
    '35 to 200': 48,
    '< 200': 49.7
}

# Convert the actual target values to a DataFrame
actual_target_df = pd.DataFrame([actual_target])

# Calculate Mean Squared Error
mse = mean_squared_error(actual_target_df, prediction_df)

print(f"Actual Target:\n{actual_target_df}")
print(f"Prediction:\n{prediction_df}")
print(f"Mean Squared Error: {mse}")


Actual Target:
   > 35  35 to 200  < 200
0   2.3         48   49.7
Prediction:
    > 35  35 to 200   < 200
0  2.576     41.934  49.194
Mean Squared Error: 12.376189333333492




In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load the data for training the model
train_data = pd.read_excel('model_data_selected_columns.xlsx')

# Extract input features and output labels
X_train = train_data.drop(columns=['> 35', '35 to 200', '< 200'])  # Drop the output columns
y_train = train_data[['> 35', '35 to 200', '< 200']]

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Initialize and train the model
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
model.fit(X_train_scaled, y_train)

# Load the new data for testing the model
test_data = pd.read_excel('Test.xlsx')

# Extract input features from the new data
X_test = test_data.drop(columns=['> 35', '35 to 200', '< 200'])  # Assuming the new data has the same structure

# Standardize the input features of the new data
X_test_scaled = scaler.transform(X_test)

# Predict on the new data
y_pred_test = model.predict(X_test_scaled)

# Convert predictions to a DataFrame
predictions_df = pd.DataFrame(y_pred_test, columns=['Predicted > 35', 'Predicted 35 to 200', 'Predicted < 200'])

# Calculate Mean Squared Error for each row
mse_test = [mean_squared_error([y_true], [y_pred]) for y_true, y_pred in zip(test_data[['> 35', '35 to 200', '< 200']].values, y_pred_test)]
mse_df = pd.DataFrame(mse_test, columns=['Mean Squared Error'])

# Combine the predictions and the MSE with the new data
result_df = pd.concat([test_data, predictions_df, mse_df], axis=1)

# Save the result to a new Excel file
output_path = 'Test_with_predictions_and_mse.xlsx'
result_df.to_excel(output_path, index=False)

print(f"Results have been saved to {output_path}")

Results have been saved to Test_with_predictions_and_mse.xlsx
