In [1]:
# Import necessary libraries
import pandas as pd                # For data handling
import numpy as np                 # For numerical computations
import matplotlib.pyplot as plt    # For visualization
import seaborn as sns              # For better plots

# Scikit-learn Libraries
from sklearn.model_selection import train_test_split  # For splitting data
from sklearn.preprocessing import StandardScaler      # For feature scaling
from sklearn.linear_model import Lasso, Ridge        # Lasso & Ridge Regression
from sklearn.metrics import mean_squared_error       # For model evaluation


In [2]:
# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
df = pd.read_excel(url)

# Display first few rows
df.head()

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075


In [3]:
# Check for missing values
print(df.isnull().sum())


Cement (component 1)(kg in a m^3 mixture)                0
Blast Furnace Slag (component 2)(kg in a m^3 mixture)    0
Fly Ash (component 3)(kg in a m^3 mixture)               0
Water  (component 4)(kg in a m^3 mixture)                0
Superplasticizer (component 5)(kg in a m^3 mixture)      0
Coarse Aggregate  (component 6)(kg in a m^3 mixture)     0
Fine Aggregate (component 7)(kg in a m^3 mixture)        0
Age (day)                                                0
Concrete compressive strength(MPa, megapascals)          0
dtype: int64


In [4]:
# Display column names
print(df.columns)

Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'Concrete compressive strength(MPa, megapascals) '],
      dtype='object')


In [6]:
from sklearn.model_selection import train_test_split

# Define independent (X) and dependent (y) variables
X = df.iloc[:, :-1]  # Select all columns except the last one
y = df.iloc[:, -1]   # Select the last column as target



In [7]:

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [9]:
# Train Lasso Regression Model
lasso = Lasso(alpha=0.1)  # Alpha is the regularization strength
lasso.fit(X_train_scaled, y_train)


In [10]:
# Predictions
y_pred_lasso = lasso.predict(X_test_scaled)

# Evaluate Performance
lasso_mse = mean_squared_error(y_test, y_pred_lasso)
print(f"Lasso Regression MSE: {lasso_mse}")

# Display feature importance (coefficients)
print("Lasso Coefficients:", lasso.coef_)

Lasso Regression MSE: 96.41516117746842
Lasso Coefficients: [10.72116343  7.39988646  3.45268769 -4.24038137  1.78943421 -0.
  0.07752961  6.81291445]


In [11]:
# Train Ridge Regression Model
ridge = Ridge(alpha=1)  # Alpha is the regularization strength


In [12]:
# Train Ridge Regression Model
ridge = Ridge(alpha=1)  # Alpha is the regularization strength
ridge.fit(X_train_scaled, y_train)



In [13]:
# Predictions
y_pred_ridge = ridge.predict(X_test_scaled)

# Evaluate Performance
ridge_mse = mean_squared_error(y_test, y_pred_ridge)
print(f"Ridge Regression MSE: {ridge_mse}")

# Display feature importance (coefficients)
print("Ridge Coefficients:", ridge.coef_)


Ridge Regression MSE: 95.96966712155428
Ridge Coefficients: [12.55974133  9.2136324   5.06159801 -3.03887328  1.84277984  1.24866736
  1.75191242  7.01883747]
