In [5]:
import pandas as pd
import numpy as np
from sklearn.pipeline import make_pipeline,Pipeline,make_union
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import chi2,SelectKBest


In [6]:
df=pd.read_csv("data.csv")

In [7]:
df.drop(columns=['Sl. No.'],axis=1,inplace=True)

In [8]:
df.head()

Unnamed: 0,NT,THT,THt,THQCr,CT,Ct,DT,Dt,QmT,TT,...,S,Ni,Cr,Cu,Mo,RedRatio,dA,dB,dC,Fatigue
0,885,865,30,24,930,0.0,830.0,15.0,30,550,...,0.022,0.01,0.02,0.01,0.0,825,0.07,0.02,0.04,232
1,885,865,30,24,930,0.0,830.0,15.0,30,550,...,0.017,0.08,0.12,0.08,0.0,610,0.11,0.0,0.04,235
2,885,865,30,24,930,0.0,830.0,15.0,30,550,...,0.015,0.02,0.03,0.01,0.0,1270,0.07,0.02,0.0,235
3,885,865,30,24,930,0.0,830.0,15.0,30,550,...,0.024,0.01,0.02,0.01,0.0,1740,0.06,0.0,0.0,241
4,885,865,30,24,930,0.0,830.0,15.0,30,550,...,0.022,0.01,0.02,0.02,0.0,825,0.04,0.02,0.0,225


In [9]:
from sklearn import set_config
set_config(display='diagram')

In [10]:
x_train,x_test,y_train,y_test = train_test_split(df.drop(columns=['Fatigue']),
                                                 df['Fatigue'],
                                                 test_size=0.4,
                                                random_state=42)

In [11]:
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

(262, 25) (175, 25) (262,) (175,)


In [12]:
#scaling
tf_01=ColumnTransformer([
    ('scale',StandardScaler(),slice(0,24))
])

In [13]:
tf_03 = RandomForestRegressor(n_estimators=30,max_depth=10,min_samples_split=5,min_samples_leaf=1)

In [14]:
pipe=Pipeline([
    ('Scaler',tf_01),
    ('Regressor',tf_03)
])

In [15]:
pipe.fit(x_train,y_train)

In [16]:
pipe.named_steps

{'Scaler': ColumnTransformer(transformers=[('scale', StandardScaler(),
                                  slice(0, 24, None))]),
 'Regressor': RandomForestRegressor(max_depth=10, min_samples_split=5, n_estimators=30)}

In [17]:
pipe.n_features_in_

25

In [18]:
y_pred = pipe.predict(x_test)

In [19]:
import pickle
pickle.dump(pipe,open('pipe.pkl','wb'))

In [None]:
min_values = df.min()
max_values = df.max()

In [26]:
for col in df.columns:
    print(f"{col}: ({min_values[col]}, {max_values[col]}, {min_values[col]})")

NT: (825.0, 930.0, 825.0)
THT: (825.0, 865.0, 825.0)
THt: (30.0, 30.0, 30.0)
THQCr: (0.0, 24.0, 0.0)
CT: (930.0, 930.0, 930.0)
Ct: (0.0, 540.0, 0.0)
DT: (830.0, 903.333, 830.0)
Dt: (15.0, 70.2, 15.0)
QmT: (30.0, 140.0, 30.0)
TT: (160.0, 680.0, 160.0)
Tt: (60.0, 120.0, 60.0)
TCr: (0.5, 24.0, 0.5)
C: (0.17, 0.63, 0.17)
Si: (0.16, 2.05, 0.16)
Mn: (0.37, 1.6, 0.37)
P: (0.002, 0.031, 0.002)
S: (0.003, 0.03, 0.003)
Ni: (0.01, 2.78, 0.01)
Cr: (0.01, 1.17, 0.01)
Cu: (0.01, 0.26, 0.01)
Mo: (0.0, 0.24, 0.0)
RedRatio: (240.0, 5530.0, 240.0)
dA: (0.0, 0.13, 0.0)
dB: (0.0, 0.05, 0.0)
dC: (0.0, 0.058, 0.0)
Fatigue: (225.0, 1190.0, 225.0)


In [25]:
max_values

NT           930.000
THT          865.000
THt           30.000
THQCr         24.000
CT           930.000
Ct           540.000
DT           903.333
Dt            70.200
QmT          140.000
TT           680.000
Tt           120.000
TCr           24.000
C              0.630
Si             2.050
Mn             1.600
P              0.031
S              0.030
Ni             2.780
Cr             1.170
Cu             0.260
Mo             0.240
RedRatio    5530.000
dA             0.130
dB             0.050
dC             0.058
Fatigue     1190.000
dtype: float64