In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
import ipywidgets as widgets
from IPython.display import display



In [2]:
df = pd.read_csv(r"C:\Users\Paras Mehta\OneDrive\Desktop\sih 1723\aluminum_wire_rod_synthetic_10000 (2).csv",low_memory=False)

In [3]:
print("Dataset Overview:")
print(df.info())
print("\nFirst 5 rows :")
print(df.head())

Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Casting_Temperature_C  10000 non-null  float64
 1   Rolling_Speed_m_min    10000 non-null  float64
 2   Cooling_Rate_C_s       10000 non-null  float64
 3   UTS_MPa                10000 non-null  float64
 4   Elongation_%           10000 non-null  float64
 5   Conductivity_%_IACS    10000 non-null  float64
dtypes: float64(6)
memory usage: 468.9 KB
None

First 5 rows :
   Casting_Temperature_C  Rolling_Speed_m_min  Cooling_Rate_C_s     UTS_MPa  \
0             697.718136           163.643838         39.712249  500.711414   
1             714.057005           142.242639         34.400726  511.514986   
2             716.629472           131.062094         38.224224  515.505950   
3             692.591177           146.584556         29.936551  505.424829   
4

In [4]:
df.head()

Unnamed: 0,Casting_Temperature_C,Rolling_Speed_m_min,Cooling_Rate_C_s,UTS_MPa,Elongation_%,Conductivity_%_IACS
0,697.718136,163.643838,39.712249,500.711414,78.081272,54.28226
1,714.057005,142.242639,34.400726,511.514986,79.159096,54.244548
2,716.629472,131.062094,38.224224,515.50595,79.558297,54.199773
3,692.591177,146.584556,29.936551,505.424829,76.581255,54.635314
4,706.830214,129.944941,31.104307,515.491479,78.649964,54.109156


In [5]:
print(df.isnull().sum())

Casting_Temperature_C    0
Rolling_Speed_m_min      0
Cooling_Rate_C_s         0
UTS_MPa                  0
Elongation_%             0
Conductivity_%_IACS      0
dtype: int64


In [6]:
df.dropna(inplace=True)

In [7]:
imputer = SimpleImputer(strategy="mean")
df.iloc[:, :] = imputer.fit_transform(df)

In [8]:
X=df[['Casting_Temperature_C', 'Rolling_Speed_m_min', 'Cooling_Rate_C_s']]
Y_uts = df['UTS_MPa']
Y_elongation = df['Elongation_%']
Y_conductivity = df['Conductivity_%_IACS']


In [9]:
X_train, X_test, y_train_uts, y_test_uts = train_test_split(X, Y_uts, test_size=0.2, random_state=42)
X_train, X_test, y_train_elongation, y_test_elongation = train_test_split(X, Y_elongation, test_size=0.2, random_state=42)
X_train, X_test, y_train_conductivity, y_test_conductivity = train_test_split(X, Y_conductivity, test_size=0.2, random_state=42)


In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [12]:
def train_best_model(y_train, y_test, label):
    model = RandomizedSearchCV(RandomForestRegressor(random_state=42), param_distributions=param_dist, 
                               n_iter=10, cv=3, n_jobs=-1, random_state=42)
    model.fit(X_train_scaled, y_train)
    best_model = model.best_estimator_
    print(f"\nBest Parameters for {label}: {model.best_params_}")

    # Model evaluation
    y_pred = best_model.predict(X_test_scaled)
    print(f"\nPerformance Metrics for {label}:")
    print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred):.2f}")
    print(f"R2 Score: {r2_score(y_test, y_pred):.2f}")
    
    return best_model

In [13]:
model_uts = train_best_model(y_train_uts, y_test_uts, "UTS Prediction")
model_elongation = train_best_model(y_train_elongation, y_test_elongation, "Elongation Prediction")
model_conductivity = train_best_model(y_train_conductivity, y_test_conductivity, "Conductivity Prediction")


  _data = np.array(data, dtype=dtype, copy=copy,



Best Parameters for UTS Prediction: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_depth': 5}

Performance Metrics for UTS Prediction:
Mean Absolute Error: 4.07
R2 Score: 0.76

Best Parameters for Elongation Prediction: {'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_depth': 5}

Performance Metrics for Elongation Prediction:
Mean Absolute Error: 0.82
R2 Score: 0.67

Best Parameters for Conductivity Prediction: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_depth': 5}

Performance Metrics for Conductivity Prediction:
Mean Absolute Error: 0.39
R2 Score: 0.13


In [14]:
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output

# Ensure 'scaler' exists and is trained
scaler = StandardScaler()
scaler.fit(X)  # Use your training data features

# Create input widgets
casting_temp = widgets.FloatText(description="Casting Temp (¬∞C):")
rolling_speed = widgets.FloatText(description="Rolling Speed (m/min):")
cooling_rate = widgets.FloatText(description="Cooling Rate (¬∞C/s):")

display(casting_temp, rolling_speed, cooling_rate)

# Output widget for displaying results
output = widgets.Output()

# Button for Prediction
button = widgets.Button(description="Predict")

def on_button_click(b):
    with output:
        clear_output(wait=True)  # Clears old output
        try:
            # Get user input and convert to NumPy array
            user_input = np.array([[casting_temp.value, rolling_speed.value, cooling_rate.value]], dtype=float)
            
            # Scale the input
            user_input_scaled = scaler.transform(user_input)
            
            # Predict values
            predicted_uts = model_uts.predict(user_input_scaled)[0]
            predicted_elongation = model_elongation.predict(user_input_scaled)[0]
            predicted_conductivity = model_conductivity.predict(user_input_scaled)[0]

            # Print Predictions
            print(f"\nüîπ **Predicted UTS:** {predicted_uts:.2f} MPa")
            print(f"üîπ **Predicted Elongation:** {predicted_elongation:.2f} %")
            print(f"üîπ **Predicted Conductivity:** {predicted_conductivity:.2f} % IACS")

        except Exception as e:
            print("‚ùå Error:", str(e))

# Link button to function
button.on_click(on_button_click)
display(button, output)


FloatText(value=0.0, description='Casting Temp (¬∞C):')

FloatText(value=0.0, description='Rolling Speed (m/min):')

FloatText(value=0.0, description='Cooling Rate (¬∞C/s):')

Button(description='Predict', style=ButtonStyle())

Output()

In [15]:
import pickle

# Ensure 'scaler' is properly fitted
scaler.fit(X_train)  # Make sure you're using X_train, not X

# Save the trained models
with open('model_uts.pkl', 'wb') as f:
    pickle.dump(model_uts, f)

with open('model_elongation.pkl', 'wb') as f:
    pickle.dump(model_elongation, f)

with open('model_conductivity.pkl', 'wb') as f:
    pickle.dump(model_conductivity, f)

# Save the scaler used for preprocessing
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

print("‚úÖ Models and scaler saved successfully!")


‚úÖ Models and scaler saved successfully!


In [16]:
## Model Training and Evaluation
#This notebook was used to preprocess data, train the model, and evaluate performance.

### 1Ô∏è‚É£ Data Preprocessing
#- StandardScaler applied
#- Outlier removal

### 2Ô∏è‚É£ Model Training
#- Used **RandomForestRegressor**
#- Hyperparameter tuning done with **GridSearchCV**
#- Best parameters stored in `model.pkl`

### 3Ô∏è‚É£ Model Export
#- Models saved using `pickle`
