In [16]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
concrete_compressive_strength = fetch_ucirepo(id=165) 
  
# data (as pandas dataframes) 
X = concrete_compressive_strength.data.features 
y = concrete_compressive_strength.data.targets 
  
# metadata 
print(concrete_compressive_strength.metadata) 
  
# variable information 
print(concrete_compressive_strength.variables) 


{'uci_id': 165, 'name': 'Concrete Compressive Strength', 'repository_url': 'https://archive.ics.uci.edu/dataset/165/concrete+compressive+strength', 'data_url': 'https://archive.ics.uci.edu/static/public/165/data.csv', 'abstract': 'Concrete is the most important material in civil engineering. The concrete compressive strength is a highly nonlinear function of age and ingredients. ', 'area': 'Physics and Chemistry', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 1030, 'num_features': 8, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['Concrete compressive strength'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1998, 'last_updated': 'Sun Feb 11 2024', 'dataset_doi': '10.24432/C5PK67', 'creators': ['I-Cheng Yeh'], 'intro_paper': {'ID': 383, 'type': 'NATIVE', 'title': 'Modeling of strength of high-performance concrete using artificial neural networks', 'authors': 'I. Yeh', 'venue': 'C

In [4]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [1]:
# Install necessary libraries for Google Colab
!pip install ucimlrepo tensorflow

# Import necessary libraries
import pandas as pd
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow import keras
from tensorflow.keras import layers

# Fetch dataset
concrete_compressive_strength = fetch_ucirepo(id=165)

# Data as pandas DataFrame
X = concrete_compressive_strength.data.features
y = concrete_compressive_strength.data.targets.values.flatten()  # Flatten the target variable

# Display the original data shape
print("Original data shape:")
print(X.shape)

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the training and testing sets
print("\nTraining set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)

# Build the neural network model
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("\nMean Squared Error on the test set:", mse)

# Display a few predictions
predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred.flatten()})
print("\nSample predictions:")
print(predictions_df.head())


Original data shape:
(1030, 8)

Training set shape: (824, 8)
Testing set shape: (206, 8)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 19950.7617 - val_loss: 2061.2385
Epoch 2/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1192.8071 - val_loss: 503.9650
Epoch 3/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 464.5180 - val_loss: 353.3856
Epoch 4/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 350.6867 - val_loss: 261.7206
Epoch 5/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 289.8922 - val_loss: 220.9951
Epoch 6/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 259.0089 - val_loss: 194.0496
Epoch 7/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 213.1921 - val_loss: 189.9547
Epoch 8/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 194.7265 - val_loss: 162.8741
Epoch 9/100

# Explanation of the Code

## Importing Necessary Libraries
The code begins by importing essential libraries. It imports `pandas` for data manipulation, `fetch_ucirepo` from the `ucimlrepo` library to retrieve datasets, and functions from `sklearn` for data splitting and evaluation. It also imports TensorFlow with Keras for building and training the neural network.

## Fetching the Dataset
The dataset is fetched from the UCI Machine Learning Repository using the `fetch_ucirepo` function with the specific ID for the Concrete Compressive Strength dataset. The dataset contains features (input variables) and targets (output variable).

## Preparing the Data
The features are extracted into a DataFrame called `X`, while the target variable, representing concrete compressive strength, is stored in `y`. The target variable is flattened to convert it into a one-dimensional array, which is required for training the model.

## Displaying Original Data Shape
The original shape of the features DataFrame `X` is printed to understand the dimensions of the dataset.

## Data Splitting
The dataset is split into training and testing sets using the `train_test_split` function. In this split, 80% of the data is used for training, and 20% is reserved for testing. A random state is set to ensure reproducibility of the split.

## Displaying Shapes of Training and Testing Sets
The shapes of the training and testing sets are printed to confirm that the data has been split correctly.

## Building the Neural Network Model
A sequential neural network model is constructed using Keras. The model consists of:
- An input layer with 64 units and ReLU activation function.
- A hidden layer with 32 units and ReLU activation function.
- An output layer with a single unit, appropriate for regression tasks, as it predicts a continuous value.

## Compiling the Model
The model is compiled using the Adam optimizer and the mean squared error (MSE) loss function. This setup is typical for regression tasks, allowing the model to learn from the data.

## Training the Model
The model is trained using the training data for 100 epochs with a batch size of 32. During training, 20% of the training data is used for validation to monitor the model's performance.

## Making Predictions
After training, the model is used to predict the target values for the test set. The predicted values are stored in `y_pred`.

## Calculating Mean Squared Error (MSE)
The MSE is calculated by comparing the actual target values from the test set with the predicted values. The MSE provides an indication of the model's performance; lower values indicate better accuracy.

## Displaying Sample Predictions
Finally, a comparison of actual versus predicted values is displayed by creating a DataFrame. The first few rows of this DataFrame are printed to provide a glimpse of the model's predictions.

# Conclusion
This code effectively implements a regression model using a neural network to predict concrete compressive strength. It encompasses data fetching, preprocessing, model training, and evaluation steps, making it a comprehensive approach for tackling regression problems using deep learning.
