In [1]:
import pandas as pd

## Model Training

In [2]:
df = pd.read_csv('/config/workspace/notebooks/data/concrete_data.csv')
df.head()

Unnamed: 0,cement,blast_furnace_slag,fly_ash,water,superplasticizer,coarse_aggregate,fine_aggregate,age,concrete_compressive_strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
## Independent and dependent features
X = df.drop(labels=['concrete_compressive_strength'],axis=1)
Y = df[['concrete_compressive_strength']]

In [4]:
Y

Unnamed: 0,concrete_compressive_strength
0,79.99
1,61.89
2,40.27
3,41.05
4,44.30
...,...
1025,44.28
1026,31.18
1027,23.70
1028,32.77


In [20]:
# Segregating numerical and categorical variables
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(exclude='object').columns
numerical_cols

Index(['cement', 'blast_furnace_slag', 'fly_ash', 'water', 'superplasticizer',
       'coarse_aggregate', 'fine_aggregate ', 'age'],
      dtype='object')

In [7]:
from sklearn.impute import SimpleImputer ## HAndling Missing Values
from sklearn.preprocessing import StandardScaler # HAndling Feature Scaling
from sklearn.preprocessing import OrdinalEncoder # Ordinal Encoding
## pipelines
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [9]:
## Numerical Pipeline
num_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('scaler',StandardScaler())

    ]

)


preprocessor=ColumnTransformer([
('num_pipeline',num_pipeline,numerical_cols)
])


In [10]:
## Train test split

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.30,random_state=30)

In [11]:
X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [12]:
X_train.head()

Unnamed: 0,num_pipeline__cement,num_pipeline__blast_furnace_slag,num_pipeline__fly_ash,num_pipeline__water,num_pipeline__superplasticizer,num_pipeline__coarse_aggregate,num_pipeline__fine_aggregate,num_pipeline__age
0,-0.419573,-0.853536,-0.860221,0.207385,-1.07846,1.892133,0.203106,-0.624229
1,-0.839792,-0.853536,0.723853,-0.721195,0.194825,0.42456,1.676307,0.219436
2,-0.571587,-0.853536,0.663958,0.373698,-0.314489,-0.088568,1.226513,-0.6931
3,-0.802031,0.689224,-0.860221,0.498432,-1.07846,0.058972,0.660481,-0.6931
4,-0.878522,-0.853536,1.116325,-0.882888,0.245756,1.5161,0.388835,-0.503706


In [13]:
## Model Training

from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [14]:
regression=LinearRegression()
regression.fit(X_train,y_train)

In [15]:
regression.coef_

array([[11.08654498,  7.76943718,  4.63849486, -3.82745247,  2.25286803,
         0.24356795,  0.44823615,  7.81708239]])

In [16]:
regression.intercept_

array([35.51234397])

In [17]:
import numpy as np
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

In [18]:
## Train multiple models
## Model Ecaluation
models={
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'Elasticnet':ElasticNet()
}
trained_model_list=[]
model_list=[]
r2_list=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    mae, rmse, r2_square=evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    r2_list.append(r2_square)
    
    print('='*35)
    print('\n')


LinearRegression
Model Training Performance
RMSE: 11.538668429960895
MAE: 8.826089685903524
R2 score 53.082812607370535


Lasso
Model Training Performance
RMSE: 11.763091280033107
MAE: 9.258548359954972
R2 score 51.240020369823625


Ridge
Model Training Performance
RMSE: 11.544262546464758
MAE: 8.836768821974548
R2 score 53.037309291941924


Elasticnet
Model Training Performance
RMSE: 12.197093352823748
MAE: 9.797301731919703
R2 score 47.57562314683908




In [19]:
model_list

['LinearRegression', 'Lasso', 'Ridge', 'Elasticnet']