In [1]:
# Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import numpy as np
from tabulate import tabulate

In [4]:
# Load the Dataset
data = pd.read_excel('C:\\Users\\tanu.gupta\\Desktop\\DATA.xlsx')


In [6]:
data.head()

Unnamed: 0,BRG_ID,BRG_OD,BRG_WIDTH,OR_TRACK_CURVE_DIA,OR_ID,OR_TRACK_DIA,OR_TRACK_DIA_DEPTH,OR_TRACK_CURVE_DIA_FACT_FINAL,IR_TRACK_CURVE_DIA,IR_OD,IR_TRACK_DIA,IR_TRACK_DIA_DEPTH,IR_TRACK_CURVE_DIA_FACT_FINAL,BALL_DIA_MM,BALL_PCD,NO_OF_BALL,STATIC_LOAD_RATING,DYNAMIC_LOAD_RATING,I
0,15.0,32.0,9.0,5.048,26.45,28.273,0.911,1.06,4.858,20.55,18.738,0.906,1.02,4.7625,23.5,9.0,2841.6,5590.4,1.0
1,35.0,80.0,21.0,14.303,67.107,71.928,2.412,1.04,13.764,49.733,44.928,2.403,1.02,13.4938,58.42,8.0,19199.4,33349.1,1.0
2,40.0,110.0,27.0,20.193,92.735,99.6,3.448,1.06,19.431,68.662,61.5,3.571,1.02,19.05,80.569,8.0,37948.4,61865.9,1.0
3,40.0,90.0,23.0,16.828,75.7,81.888,3.094,1.06,16.192,56.1,50.125,2.987,1.02,15.875,66.0,7.0,22914.7,40674.5,1.0
4,40.0,110.0,27.0,23.558,88.29,96.638,4.173,1.06,22.669,60.509,52.175,4.167,1.02,22.225,74.4,6.0,34454.6,63945.2,1.0


In [8]:
data.shape

(2690, 19)

In [10]:
# Check for Missing Values
print(data.isnull().sum())

BRG_ID                           440
BRG_OD                             0
BRG_WIDTH                          0
OR_TRACK_CURVE_DIA               476
OR_ID                            475
OR_TRACK_DIA                     475
OR_TRACK_DIA_DEPTH               475
OR_TRACK_CURVE_DIA_FACT_FINAL    475
IR_TRACK_CURVE_DIA               475
IR_OD                            475
IR_TRACK_DIA                     475
IR_TRACK_DIA_DEPTH               475
IR_TRACK_CURVE_DIA_FACT_FINAL    475
BALL_DIA_MM                      475
BALL_PCD                         475
NO_OF_BALL                       475
STATIC_LOAD_RATING                 0
DYNAMIC_LOAD_RATING                0
I                                475
dtype: int64


In [32]:
data_clean=data.dropna(axis=0)

In [38]:
data_clean.shape

(2214, 19)

In [40]:
print(data_clean.isnull().sum())

BRG_ID                           0
BRG_OD                           0
BRG_WIDTH                        0
OR_TRACK_CURVE_DIA               0
OR_ID                            0
OR_TRACK_DIA                     0
OR_TRACK_DIA_DEPTH               0
OR_TRACK_CURVE_DIA_FACT_FINAL    0
IR_TRACK_CURVE_DIA               0
IR_OD                            0
IR_TRACK_DIA                     0
IR_TRACK_DIA_DEPTH               0
IR_TRACK_CURVE_DIA_FACT_FINAL    0
BALL_DIA_MM                      0
BALL_PCD                         0
NO_OF_BALL                       0
STATIC_LOAD_RATING               0
DYNAMIC_LOAD_RATING              0
I                                0
dtype: int64


In [54]:
# Select Features and Target Variables
features = ['BRG_ID', 'BRG_OD', 'BRG_WIDTH', 'STATIC_LOAD_RATING', 'DYNAMIC_LOAD_RATING']
target = ['OR_TRACK_CURVE_DIA', 'OR_ID', 'OR_TRACK_DIA', 'OR_TRACK_DIA_DEPTH', 
          'OR_TRACK_CURVE_DIA_FACT_FINAL', 'IR_TRACK_CURVE_DIA', 'IR_OD', 
          'IR_TRACK_DIA', 'IR_TRACK_DIA_DEPTH', 'IR_TRACK_CURVE_DIA_FACT_FINAL', 
          'BALL_DIA_MM', 'BALL_PCD', 'NO_OF_BALL']

In [56]:
X = data_clean[features]
y = data_clean[target]

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [60]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [62]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

In [64]:
y_pred = model.predict(X_test_scaled)

In [79]:
def predict_new_values(new_data):
    """
    Predicts target values based on new input data.

    Parameters:
    new_data (pd.DataFrame): DataFrame containing new feature data.

    Returns:
    pd.DataFrame: DataFrame containing predicted target values.
    """
    new_data_scaled = scaler.transform(new_data[features])  # Scale the new data
    predictions = model.predict(new_data_scaled)  # Make predictions
    return pd.DataFrame(predictions, columns=target)

In [70]:
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.expand_frame_repr', False)  # Prevent wrapping to the next line
pd.set_option('display.float_format', '{:.4f}'.format)  # Set float format

In [72]:
new_data = pd.DataFrame({
    'BRG_ID': [32],  
    'BRG_OD': [80],  
    'BRG_WIDTH': [21],  
    'STATIC_LOAD_RATING': [19199.4
],  
    'DYNAMIC_LOAD_RATING': [33349.1
]  
})


In [74]:
predicted_values_df = predict_new_values(new_data)

In [76]:
# Loop through each row to print in a clearer format
for index, row in predicted_values_df.iterrows():
    print(f"Row {index + 1}")
    print(row.to_string())
    print("-" * 40)  # Add a line separator between rows for readability

Row 1
OR_TRACK_CURVE_DIA              12.6432
OR_ID                           65.4497
OR_TRACK_DIA                    70.6551
OR_TRACK_DIA_DEPTH               2.5529
OR_TRACK_CURVE_DIA_FACT_FINAL    1.0596
IR_TRACK_CURVE_DIA              14.2543
IR_OD                           48.2907
IR_TRACK_DIA                    42.9722
IR_TRACK_DIA_DEPTH               2.7265
IR_TRACK_CURVE_DIA_FACT_FINAL    1.0200
BALL_DIA_MM                     13.9700
BALL_PCD                        56.8234
NO_OF_BALL                       7.5700
----------------------------------------


In [21]:
from sklearn.metrics import r2_score

# Predict the target values for the test set
y_pred = model.predict(X_test_scaled)

# Calculate R² score
r2 = r2_score(y_test, y_pred)

# Convert to percentage
r2_percentage = r2 * 100

print(f"Model Accuracy (R² Score): {r2_percentage:.2f}%")

Model Accuracy (R² Score): 87.99%
