In [None]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from tqdm import tqdm

In [None]:
from sklearn import set_config
set_config(display="diagram")

In [1]:
#from google.colab import drive
#drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/concrete_data.csv")
df.columns = ["cement", "bfs", "fly_ash", "water", "superplz", "coarse_agg",
              "fine_agg", "age", "conc_str"]

In [None]:
X = df.drop("conc_str", axis=1, inplace=False)
y = df.conc_str

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
                                                    random_state=42,
                                                    shuffle=True)

In [None]:
from sklearn.pipeline import Pipeline

class WaterCementRatio(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        wtr_cmt = np.around((X.water / X.cement), decimals=2)
        wtr_cmt_df = pd.DataFrame(wtr_cmt, columns=["water_cement_ratio"])

        return pd.concat([X, wtr_cmt_df], axis=1)
       
wtr_cem_pipeline = Pipeline(steps=[("wtr_cem_ratio", WaterCementRatio())])
col_transformer = ColumnTransformer(
    transformers=[
        ("std_scaler", StandardScaler(), X.columns)
    ],
    remainder="passthrough"
)

def pipeline(X, y=None, fit_transform=True):
    cols = ["cement", "bfs", "fly_ash", "water", "superplz",
            "coarse_agg", "fine_agg", "age", "water_cement_ratio"]
    if fit_transform:
        X = wtr_cem_pipeline.fit_transform(X)
        X = col_transformer.fit_transform(X)
    else:
        X = wtr_cem_pipeline.transform(X)
        X = col_transformer.transform(X)

    return pd.DataFrame(X, columns=cols)

In [None]:
X_train_tr = pipeline(X_train, fit_transform=True)

In [None]:
from sklearn.model_selection import cross_val_score

## Training Using Xgboost Algorithm

In [None]:
from xgboost import XGBRegressor

scores = cross_val_score(XGBRegressor(n_estimators=500, objective="reg:squarederror"), X_train_tr, y_train, cv=10, scoring="r2")
scores.mean()

0.9298321667574149

In [None]:
xgb_model = XGBRegressor(n_estimators=500, objective="reg:squarederror")
xgb_model.fit(X_train_tr, y_train)

In [None]:
X_test_tr = pipeline(X_test, fit_transform=False)

In [None]:
len(X_test_tr)

206

### Model Performance on test dataset

In [None]:
y_pred = xgb_model.predict(X_test_tr)

In [None]:
from sklearn.metrics import r2_score


r2_score(y_test, y_pred)

0.9268766439951882

In [None]:
len(y_test)

206

## Training Using ANN (Pytorch)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
X_ = X_train_tr.to_numpy()
y_ = y_train.to_numpy().reshape(-1, 1)
X_tr = torch.from_numpy(X_).to(device=device, dtype=torch.float32)
y_tr = torch.from_numpy(y_).to(device=device, dtype=torch.float32)

In [None]:
ann_model = nn.Sequential(
    nn.Linear(9, 50).to(device=device),
    nn.ReLU(),
    nn.Linear(50, 40),
    nn.LeakyReLU(),
    nn.Linear(40, 30),
    nn.ReLU(),
    nn.Linear(30, 20),
    nn.LeakyReLU(),
    nn.Linear(20, 10),
    nn.ReLU(),
    nn.Linear(10, 5),
    nn.LeakyReLU(),
    nn.Linear(5, 1),
    nn.ReLU()
).to(device=device)

loss_fn = nn.MSELoss().to(device=device)
optimizer = torch.optim.Adam(ann_model.parameters())

In [None]:
dataset = TensorDataset(X_tr, y_tr)
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
epochs = 1000
for epoch in range(epochs):
    # forward pass
    for x_tr, y_tr in data_loader:
        pred = ann_model(x_tr)
        loss = loss_fn(pred, y_tr)

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # if (epoch + 1) % 10 == 0:
    #     print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


In [None]:
Xt_ = X_test_tr.to_numpy()
Xt_tr = torch.from_numpy(Xt_).to(device=device, dtype=torch.float32)

In [None]:
with torch.no_grad():
    pred = ann_model(Xt_tr).cpu()
    pred = pred.numpy()

In [None]:
r2_score(y_test, pred)

0.8774803555649596