In [1]:
# Import Data Manipulation Library
import numpy as np
import pandas as pd
# Import Data Visualization Library
import seaborn as sns
import matplotlib.pyplot as plt
# Import filter warning Libraries
import warnings
warnings.filterwarnings('ignore')
# import Logging
import logging
logging.basicConfig(level = logging.INFO,
                    format = '%(asctime)s - %(levelname)s - %(message)s',
                    filemode = 'w',
                    filename = 'model.log', force = True)


In [2]:
logging.info('Import Dataset For Model Building...')

In [3]:
# Importing dataset from github to local machine
url = 'https://raw.githubusercontent.com/rutuja0703/CementPrediction_Model/refs/heads/main/Concrete_Data%20(3).csv'
df = pd.read_csv(url)
df.sample(frac = 1)

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
310,295.7,0.0,95.6,171.5,8.9,955.1,859.2,14,35.23
114,362.6,189.0,0.0,164.9,11.6,944.7,755.8,7,22.90
880,152.0,0.0,112.0,184.0,8.0,992.0,816.0,28,12.18
773,382.0,0.0,0.0,186.0,0.0,1047.0,739.0,28,37.42
739,296.0,0.0,0.0,186.0,0.0,1090.0,769.0,28,25.18
...,...,...,...,...,...,...,...,...,...
692,212.0,141.3,0.0,203.5,0.0,973.4,750.0,90,39.70
196,194.7,0.0,100.5,165.6,7.5,1006.4,905.9,28,25.72
695,116.0,173.0,0.0,192.0,0.0,909.8,891.9,28,22.35
31,266.0,114.0,0.0,228.0,0.0,932.0,670.0,365,52.91


In [8]:
# Checking Data columns
df.columns

Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'Concrete compressive strength(MPa, megapascals) '],
      dtype='object')

In [24]:
import statsmodels.api as sm
X = df.drop(columns = 'Concrete compressive strength(MPa, megapascals) ',axis = 1)
y = df['Concrete compressive strength(MPa, megapascals) ']

In [26]:
X = sm.add_constant(X)

In [28]:
model = sm.OLS(y,X).fit()
model.summary()

0,1,2,3
Dep. Variable:,"Concrete compressive strength(MPa, megapascals)",R-squared:,0.616
Model:,OLS,Adj. R-squared:,0.613
Method:,Least Squares,F-statistic:,204.3
Date:,"Wed, 28 May 2025",Prob (F-statistic):,6.29e-206
Time:,19:50:43,Log-Likelihood:,-3869.0
No. Observations:,1030,AIC:,7756.0
Df Residuals:,1021,BIC:,7800.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-23.3312,26.586,-0.878,0.380,-75.500,28.837
Cement (component 1)(kg in a m^3 mixture),0.1198,0.008,14.113,0.000,0.103,0.136
Blast Furnace Slag (component 2)(kg in a m^3 mixture),0.1039,0.010,10.247,0.000,0.084,0.124
Fly Ash (component 3)(kg in a m^3 mixture),0.0879,0.013,6.988,0.000,0.063,0.113
Water (component 4)(kg in a m^3 mixture),-0.1499,0.040,-3.731,0.000,-0.229,-0.071
Superplasticizer (component 5)(kg in a m^3 mixture),0.2922,0.093,3.128,0.002,0.109,0.476
Coarse Aggregate (component 6)(kg in a m^3 mixture),0.0181,0.009,1.926,0.054,-0.000,0.037
Fine Aggregate (component 7)(kg in a m^3 mixture),0.0202,0.011,1.887,0.059,-0.001,0.041
Age (day),0.1142,0.005,21.046,0.000,0.104,0.125

0,1,2,3
Omnibus:,5.378,Durbin-Watson:,1.282
Prob(Omnibus):,0.068,Jarque-Bera (JB):,5.304
Skew:,-0.174,Prob(JB):,0.0705
Kurtosis:,3.045,Cond. No.,106000.0


In [30]:
# Exploratory data analysis
df

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.30
...,...,...,...,...,...,...,...,...,...
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28,44.28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28,31.18
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28,23.70
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28,32.77


In [66]:
# Checking Descriptive Stats : EDA 
from collections import OrderedDict

stats = []
for i in df.columns:
    numerical_stats = OrderedDict({
         'Feature': i,
         'Median' :df[i].median(),
         'Mean' :df[i].mean(),
         'Q1' : df[i].quantile(0.25),
         'Q3' : df[i].quantile(0.75),
         'IQR' :df[i].quantile(0.75) - df[i].quantile(0.25),
         'Standard Deviation' :df[i].std(),
         'Skewness' :df[i].skew(),
         'Kurtosis' :df[i].kurt()
         })
    stats.append(numerical_stats)
    report = pd.DataFrame(stats)

report

Unnamed: 0,Feature,Median,Mean,Q1,Q3,IQR,Standard Deviation,Skewness,Kurtosis
0,Cement (component 1)(kg in a m^3 mixture),272.9,281.167864,192.375,350.0,157.625,104.506364,0.509481,-0.520652
1,Blast Furnace Slag (component 2)(kg in a m^3 m...,22.0,73.895825,0.0,142.95,142.95,86.279342,0.800717,-0.508175
2,Fly Ash (component 3)(kg in a m^3 mixture),0.0,54.18835,0.0,118.3,118.3,63.997004,0.537354,-1.328746
3,Water (component 4)(kg in a m^3 mixture),185.0,181.567282,164.9,192.0,27.1,21.354219,0.074628,0.122082
4,Superplasticizer (component 5)(kg in a m^3 mix...,6.4,6.20466,0.0,10.2,10.2,5.973841,0.907203,1.411269
5,Coarse Aggregate (component 6)(kg in a m^3 mi...,968.0,972.918932,932.0,1029.4,97.4,77.753954,-0.04022,-0.599016
6,Fine Aggregate (component 7)(kg in a m^3 mixture),779.5,773.580485,730.95,824.0,93.05,80.17598,-0.25301,-0.102177
7,Age (day),28.0,45.662136,7.0,56.0,49.0,63.169912,3.269177,12.168989
8,"Concrete compressive strength(MPa, megapascals)",34.445,35.817961,23.71,46.135,22.425,16.705742,0.416977,-0.313725
