# **Cement Strength Prediction**

In [58]:
# importing neccessary modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from sklearn.compose import ColumnTransformer

## **Data Gathering**

In [None]:
# loading dataset
df= pd.read_csv('concrete_data.csv')
df.sample(5)

## Data Analysis

In [167]:
# checking number of duplicates and null values
df.isna().sum()
df.duplicated().sum()
df.drop_duplicates(subset=df.columns[:-1], inplace=True)

In [None]:
# ploting graphs before transformation
for col in df.columns:
  fig= plt.figure(figsize=(8,4))
  plt.subplot(121)
  stats.probplot(df[col],dist="norm",plot=plt)
  plt.title(col)
  plt.show()

In [None]:
df.describe()

## **Train Test split**

In [140]:
# train test split dataset
from sklearn.model_selection import train_test_split
X= df.iloc[:,0:8]
y=df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## **Data Preprocessing**

In [None]:
# Power Transform and Scaling
from sklearn.preprocessing import PowerTransformer
power=ColumnTransformer([
    ('norm',PowerTransformer(method='yeo-johnson'),slice(8))
],remainder='passthrough')
X_train_transformed_1=power.fit_transform(X_train)
X_train_transformed_1=pd.DataFrame(X_train_transformed_1,columns=X_train.columns)
X_train_transformed_1

# **training modal**

In [142]:
# train the modal
from sklearn.linear_model import LinearRegression
modal = LinearRegression()

## **Creating Pipeline**

In [143]:
# creating pipeline
from sklearn.pipeline import make_pipeline
pipe = make_pipeline(power,modal)

In [144]:
# Display Pipeline
from sklearn import set_config
set_config(display='diagram')

In [None]:
# train
pipe.fit(X_train,y_train)

## **Modal Evaluation**

In [None]:
# accuracy_Score
from sklearn.metrics import r2_score
y_pred = pipe.predict(X_test)
r2_score(y_test, y_pred)

# **Cross Validation**

In [None]:
# cross validation using cross_val_score
from sklearn.model_selection import cross_val_score
cross_val_score(pipe,X_test,y_test,cv=10,scoring='r2').mean()

# **Exporting Modal**

In [155]:
#export pipeline
import pickle
pickle.dump(pipe,open('pipe.pkl','wb'))