In [2]:
import xgboost as xgb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path
import joblib


In [3]:
# Define paths
from pathlib import Path
import pandas as pd

base_dir = Path('/home/nitin/Downloads/SIH-1723/')
results_path = base_dir / 'results'
model_path = base_dir / 'ml_models' / 'models'

# Load data
X_train = pd.read_csv(base_dir / 'data/processed/forward/X_train_Elongation.csv')
X_test = pd.read_csv(base_dir / 'data/processed/forward/X_test_Elongation.csv')
y_train = pd.read_csv(base_dir / 'data/processed/forward/y_train_Elongation.csv').squeeze()
y_test = pd.read_csv(base_dir / 'data/processed/forward/y_test_Elongation.csv').squeeze()
A_train = pd.read_csv(base_dir / 'data/processed/forward/X_train_UTS.csv')
A_test = pd.read_csv(base_dir / 'data/processed/forward/X_test_UTS.csv')
b_train = pd.read_csv(base_dir / 'data/processed/forward/y_train_UTS.csv').squeeze()
b_test = pd.read_csv(base_dir / 'data/processed/forward/y_test_UTS.csv').squeeze()
R_train = pd.read_csv(base_dir / 'data/processed/forward/X_train_Conductivity.csv')
R_test = pd.read_csv(base_dir / 'data/processed/forward/X_test_Conductivity.csv')
s_train = pd.read_csv(base_dir / 'data/processed/forward/y_train_Conductivity.csv').squeeze()
s_test = pd.read_csv(base_dir / 'data/processed/forward/y_test_Conductivity.csv').squeeze()


In [4]:
def scale_data(X_train, X_test, A_train, A_test, R_train, R_test, y_train, y_test, b_train, b_test, s_train, s_test):
    feature_scaler = MinMaxScaler()
    target_scaler_y = MinMaxScaler()
    target_scaler_b = MinMaxScaler()
    target_scaler_s = MinMaxScaler()

    # Scale the features
    X_train_scaled = feature_scaler.fit_transform(X_train)
    X_test_scaled = feature_scaler.transform(X_test)
    A_train_scaled = feature_scaler.fit_transform(A_train)
    A_test_scaled = feature_scaler.transform(A_test)
    R_train_scaled = feature_scaler.fit_transform(R_train)
    R_test_scaled = feature_scaler.transform(R_test)

    # Scale the target variables
    y_train_scaled = target_scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
    y_test_scaled = target_scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()
    b_train_scaled = target_scaler_b.fit_transform(b_train.values.reshape(-1, 1)).flatten()
    b_test_scaled = target_scaler_b.transform(b_test.values.reshape(-1, 1)).flatten()
    s_train_scaled = target_scaler_s.fit_transform(s_train.values.reshape(-1, 1)).flatten()
    s_test_scaled = target_scaler_s.transform(s_test.values.reshape(-1, 1)).flatten()

    return X_train_scaled, X_test_scaled, A_train_scaled, A_test_scaled, R_train_scaled, R_test_scaled, \
           y_train_scaled, y_test_scaled, b_train_scaled, b_test_scaled, s_train_scaled, s_test_scaled, \
           target_scaler_y, target_scaler_b, target_scaler_s, feature_scaler


In [5]:
def train_models(X_train_scaled, y_train_scaled, A_train_scaled, b_train_scaled, R_train_scaled, s_train_scaled):
    model_E = xgb.XGBRegressor(n_estimators=2000, learning_rate=0.5, max_depth=15, subsample=0.9, colsample_bytree=0.8, random_state=42)
    model_UTS = xgb.XGBRegressor(n_estimators=2000, learning_rate=0.5, max_depth=15, subsample=0.9, colsample_bytree=0.8, random_state=42)
    model_C = xgb.XGBRegressor(n_estimators=2000, learning_rate=0.5, max_depth=15, subsample=0.9, colsample_bytree=0.8, random_state=42)

    model_E.fit(X_train_scaled, y_train_scaled)
    model_UTS.fit(A_train_scaled, b_train_scaled)
    model_C.fit(R_train_scaled, s_train_scaled)

    return model_E, model_UTS, model_C


In [6]:
def make_predictions(model_E, model_UTS, model_C, X_test_scaled, A_test_scaled, R_test_scaled,
                     target_scaler_y, target_scaler_b, target_scaler_s,
                     y_test_scaled, b_test_scaled, s_test_scaled):
    yhat_scaled = model_E.predict(X_test_scaled)
    inv_yhat = target_scaler_y.inverse_transform(yhat_scaled.reshape(-1, 1)).flatten()
    inv_y = target_scaler_y.inverse_transform(y_test_scaled.reshape(-1, 1)).flatten()

    bhat_scaled = model_UTS.predict(A_test_scaled)
    inv_bhat = target_scaler_b.inverse_transform(bhat_scaled.reshape(-1, 1)).flatten()
    inv_b = target_scaler_b.inverse_transform(b_test_scaled.reshape(-1, 1)).flatten()

    shat_scaled = model_C.predict(R_test_scaled)
    inv_shat = target_scaler_s.inverse_transform(shat_scaled.reshape(-1, 1)).flatten()
    inv_s = target_scaler_s.inverse_transform(s_test_scaled.reshape(-1, 1)).flatten()

    return inv_y, inv_yhat, inv_b, inv_bhat, inv_s, inv_shat


In [7]:
def evaluate_and_save_results(inv_y, inv_yhat, inv_b, inv_bhat, inv_s, inv_shat):
    rmse1 = np.sqrt(mean_squared_error(inv_y, inv_yhat))
    rmse2 = np.sqrt(mean_squared_error(inv_b, inv_bhat))
    rmse3 = np.sqrt(mean_squared_error(inv_s, inv_shat))

    print(f'RMSE Elongation: {rmse1:.3f}')
    print(f'RMSE UTS: {rmse2:.3f}')
    print(f'RMSE Conductivity: {rmse3:.3f}')

    pd.DataFrame({'Actual': inv_y, 'Predicted': inv_yhat}).to_csv(results_path / 'test_results_xgboost_Elongation.csv', index=False)
    pd.DataFrame({'Actual': inv_b, 'Predicted': inv_bhat}).to_csv(results_path / 'test_results_xgboost_UTS.csv', index=False)
    pd.DataFrame({'Actual': inv_s, 'Predicted': inv_shat}).to_csv(results_path / 'test_results_xgboost_Conductivity.csv', index=False)


In [8]:
def save_models_and_scalers(model_E, model_UTS, model_C, target_scaler_y, target_scaler_b, target_scaler_s, feature_scaler):
    joblib.dump(model_E, model_path / 'xgboost_model_Elongation.joblib')
    joblib.dump(model_UTS, model_path / 'xgboost_model_UTS.joblib')
    joblib.dump(model_C, model_path / 'xgboost_model_Conductivity.joblib')

    joblib.dump(target_scaler_y, model_path / 'target_scaler_y.joblib')
    joblib.dump(target_scaler_b, model_path / 'target_scaler_b.joblib')
    joblib.dump(target_scaler_s, model_path / 'target_scaler_s.joblib')
    joblib.dump(feature_scaler, model_path / 'feature_scaler.joblib')


In [9]:
X_train_scaled, X_test_scaled, A_train_scaled, A_test_scaled, R_train_scaled, R_test_scaled, \
y_train_scaled, y_test_scaled, b_train_scaled, b_test_scaled, s_train_scaled, s_test_scaled, \
target_scaler_y, target_scaler_b, target_scaler_s, feature_scaler = scale_data(X_train, X_test, A_train, A_test, R_train, R_test,
                                                              y_train, y_test, b_train, b_test, s_train, s_test)

model_E, model_UTS, model_C = train_models(X_train_scaled, y_train_scaled, A_train_scaled, b_train_scaled, R_train_scaled, s_train_scaled)

inv_y, inv_yhat, inv_b, inv_bhat, inv_s, inv_shat = make_predictions(model_E, model_UTS, model_C, X_test_scaled,
                                                                     A_test_scaled, R_test_scaled,
                                                                     target_scaler_y, target_scaler_b, target_scaler_s,
                                                                     y_test_scaled, b_test_scaled, s_test_scaled)

evaluate_and_save_results(inv_y, inv_yhat, inv_b, inv_bhat, inv_s, inv_shat)

save_models_and_scalers(model_E, model_UTS, model_C, target_scaler_y, target_scaler_b, target_scaler_s, feature_scaler)


RMSE Elongation: 2.400
RMSE UTS: 0.512
RMSE Conductivity: 0.195


In [16]:
import joblib
import numpy as np
import pandas as pd

X_features_columns = [
    'EMUL_OIL_L_TEMP_PV_VAL0', 
    'STAND_OIL_L_TEMP_PV_REAL_VAL0', 
    'GEAR_OIL_L_TEMP_PV_REAL_VAL0', 
    'EMUL_OIL_L_PR_VAL0','QUENCH_CW_FLOW_EXIT_VAL0', 
    'CAST_WHEEL_RPM_VAL0', 
    'BAR_TEMP_VAL0', 
    'QUENCH_CW_FLOW_ENTRY_VAL0','GEAR_OIL_L_PR_VAL0', 
    'STANDS_OIL_L_PR_VAL0', 
    'TUNDISH_TEMP_VAL0', 
    'BATH_TEMP_F7_VAL0', 
    'BATH_TEMP_F8_VAL0', 
    'RM_MOTOR_COOL_WATER__VAL0', 
    'ROLL_MILL_AMPS_VAL0', 
    'RM_COOL_WATER_FLOW_VAL0', 
    'EMULSION_LEVEL_ANALO_VAL0', 
    '%AL'
]

A_features_columns = [
    'EMUL_OIL_L_TEMP_PV_VAL0', 
    'STAND_OIL_L_TEMP_PV_REAL_VAL0', 
    'GEAR_OIL_L_TEMP_PV_REAL_VAL0', 
    'EMUL_OIL_L_PR_VAL0','QUENCH_CW_FLOW_EXIT_VAL0', 
    'CAST_WHEEL_RPM_VAL0', 
    'BAR_TEMP_VAL0', 
    'QUENCH_CW_FLOW_ENTRY_VAL0','GEAR_OIL_L_PR_VAL0', 
    'STANDS_OIL_L_PR_VAL0', 
    'TUNDISH_TEMP_VAL0', 
    'BATH_TEMP_F7_VAL0', 
    'BATH_TEMP_F8_VAL0', 
    'RM_MOTOR_COOL_WATER__VAL0', 
    'ROLL_MILL_AMPS_VAL0', 
    'RM_COOL_WATER_FLOW_VAL0', 
    'EMULSION_LEVEL_ANALO_VAL0', 
    '%AL'
]

R_features_columns = [
    'EMUL_OIL_L_TEMP_PV_VAL0', 
    'STAND_OIL_L_TEMP_PV_REAL_VAL0', 
    'GEAR_OIL_L_TEMP_PV_REAL_VAL0', 
    'EMUL_OIL_L_PR_VAL0','QUENCH_CW_FLOW_EXIT_VAL0', 
    'CAST_WHEEL_RPM_VAL0', 
    'BAR_TEMP_VAL0', 
    'QUENCH_CW_FLOW_ENTRY_VAL0','GEAR_OIL_L_PR_VAL0', 
    'STANDS_OIL_L_PR_VAL0', 
    'TUNDISH_TEMP_VAL0', 
    'BATH_TEMP_F7_VAL0', 
    'BATH_TEMP_F8_VAL0', 
    'RM_MOTOR_COOL_WATER__VAL0', 
    'ROLL_MILL_AMPS_VAL0', 
    'RM_COOL_WATER_FLOW_VAL0', 
    'EMULSION_LEVEL_ANALO_VAL0', 
    '%AL'
]


def load_models_and_predict_last_rows(file_path):
    """
    Load trained models and scalers from disk, load data from an Excel file, and make predictions for the last 10 rows.
    """
    print("Loading models and scalers...")

    # Load models
    model_E = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/xgboost_model_Elongation.joblib')
    model_UTS = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/xgboost_model_UTS.joblib')
    model_C = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/xgboost_model_Conductivity.joblib')

    # Load scalers
    target_scaler_y = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/target_scaler_y.joblib')
    target_scaler_b = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/target_scaler_b.joblib')
    target_scaler_s = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/target_scaler_s.joblib')

    input_scaler_X = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/feature_scaler.joblib')
    input_scaler_A = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/feature_scaler.joblib')
    input_scaler_R = joblib.load('/home/nitin/Downloads/SIH-1723/ml_models/models/feature_scaler.joblib')

    print("Models and scalers loaded successfully.")

    # Load data from Excel file
    print(f"Loading data from {file_path}...")
    data = pd.read_csv(file_path)

    # Select the last 10 rows
    last_10_rows = data.tail(10)

    # Define feature groups
    X_features = last_10_rows[X_features_columns]
    A_features = last_10_rows[A_features_columns]
    R_features = last_10_rows[R_features_columns]

    # Scale the features
    X_test_scaled = input_scaler_X.transform(X_features)
    A_test_scaled = input_scaler_A.transform(A_features)
    R_test_scaled = input_scaler_R.transform(R_features)

    # Initialize lists to store predictions
    elongation_predictions = []
    uts_predictions = []
    conductivity_predictions = []

    print("Making predictions for the last 10 rows...")
    for X_test_row, A_test_row, R_test_row in zip(X_test_scaled, A_test_scaled, R_test_scaled):
        # Ensure each row is 2D
        X_test_row = X_test_row.reshape(1, -1)
        A_test_row = A_test_row.reshape(1, -1)
        R_test_row = R_test_row.reshape(1, -1)

        # Predictions for Elongation
        yhat_scaled = model_E.predict(X_test_row)
        inv_yhat = target_scaler_y.inverse_transform(yhat_scaled.reshape(-1, 1)).flatten()
        elongation_predictions.append(inv_yhat[0])

        # Predictions for UTS
        bhat_scaled = model_UTS.predict(A_test_row)
        inv_bhat = target_scaler_b.inverse_transform(bhat_scaled.reshape(-1, 1)).flatten()
        uts_predictions.append(inv_bhat[0])

        # Predictions for Conductivity
        shat_scaled = model_C.predict(R_test_row)
        inv_shat = target_scaler_s.inverse_transform(shat_scaled.reshape(-1, 1)).flatten()
        conductivity_predictions.append(inv_shat[0])

    print("Predictions completed.")
    
    # Combine predictions into a DataFrame
    predictions = pd.DataFrame({
        'Elongation': elongation_predictions,
        'UTS': uts_predictions,
        'Conductivity': conductivity_predictions
    })

    return predictions

# Example usage:
file_path = "/home/nitin/Downloads/SIH-1723/data/processed/forward/X_test_Conductivity.csv"
predictions = load_models_and_predict_last_rows(file_path)

# Display the predictions
print(predictions)


Loading models and scalers...
Models and scalers loaded successfully.
Loading data from /home/nitin/Downloads/SIH-1723/data/processed/forward/X_test_Conductivity.csv...
Making predictions for the last 10 rows...
Predictions completed.
   Elongation        UTS  Conductivity
0   12.367015  10.606871     61.212036
1   11.847616  11.315399     60.921577
2   10.155425  11.333567     60.992916
3   12.787297  10.483118     61.258614
4   13.226456  10.077614     61.660809
5    9.848435  11.553331     61.285473
6   11.753441  10.764815     61.214531
7   12.199645  10.965165     61.338993
8   14.248695  10.798165     61.192360
9   13.525156  10.115539     61.209919


In [107]:
X_test_scaled

array([[0.93588475, 0.77465302, 0.63375382, ..., 0.31400675, 0.46739094,
        0.38848921],
       [0.94566101, 0.81056641, 0.69722745, ..., 0.24059132, 0.5467571 ,
        0.81294964],
       [0.89305318, 0.5851967 , 0.62196362, ..., 0.61282472, 0.35994443,
        0.36690647],
       ...,
       [0.95814703, 0.81503915, 0.658603  , ..., 0.29768046, 0.47942188,
        0.49640288],
       [0.89620135, 0.65649269, 0.53100159, ..., 0.78870606, 0.46020107,
        0.30935252],
       [0.90709811, 0.78582553, 0.66011136, ..., 0.31173174, 0.4896359 ,
        0.23741007]])