In [2]:
import numpy as np
import pandas as pd

# Load the dataset
df = pd.read_csv("hardness_dataset.csv")

# Check the first few rows to understand the structure
print(df.head())

     material  Formula CrystalSystem   bulk  shear   young  poisson  Hexp
0       mp-66  Diamond         cubic  435.3  520.5  1116.5     0.07  96.0
1    mp-30148     BC2N  orthorhombic  361.0  422.7   912.1     0.08  76.0
2   mp-629458     BC2N  orthorhombic  361.6  409.0   891.1     0.09  76.0
3  mp-1018649    c-BC5      trigonal  405.8  378.2   865.6     0.14  71.0
4     mp-1639       BN         cubic  408.0  374.5   860.2     0.15  63.0


# a) Regression of experimental Hardness

In [4]:
# Split data into features and target variable
x = df[['bulk', 'shear', 'young', 'poisson']]
y = df['Hexp']

# Split into training and test sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Standardize the feature data
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.metrics import R2Score

#setting up the ANN
# first layer is number of features
# last layer is output layer. Nodes=1 for binary classification
model = Sequential()
model.add(Dense(units=x_train.shape[1],activation='sigmoid'))
model.add(Dropout(0.3))
model.add(Dense(units=6,activation='relu'))
model.add(Dense(units=4,activation='relu'))
model.add(Dense(units=1))

model.compile(loss='mse',optimizer='adam',metrics=[R2Score])

model.fit(x=x_train, y=y_train,epochs=40,validation_data=(x_test, y_test), batch_size = 10, verbose=1 )

# get prediction on test data
predictions_train=model.predict(x_train)
predictions_test=model.predict(x_test)

from sklearn import metrics

# Metrics for test data
print("\nMetrics for Test data before hyperparamter tuning")
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, predictions_test))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, predictions_test))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error (y_test, predictions_test)))
print('R2 Score:', metrics.r2_score(y_test, predictions_test))

Epoch 1/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 241ms/step - loss: 442.6897 - r2_score: -1.1813 - val_loss: 473.3813 - val_r2_score: -1.1765
Epoch 2/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 562.0645 - r2_score: -0.8851 - val_loss: 470.9218 - val_r2_score: -1.1652
Epoch 3/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 408.9735 - r2_score: -1.1242 - val_loss: 468.4446 - val_r2_score: -1.1538
Epoch 4/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 480.8632 - r2_score: -0.9974 - val_loss: 465.8164 - val_r2_score: -1.1417
Epoch 5/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 520.2639 - r2_score: -0.8698 - val_loss: 463.0367 - val_r2_score: -1.1289
Epoch 6/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 532.3087 - r2_score: -1.0197 - val_loss: 460.1010 - val_r2_score: -1.1154
Ep

In [6]:
#hyperparameter tuning
from tensorflow.keras.optimizers import Adam, RMSprop

def build_model(hp):

    model = Sequential()

    # Input + first hidden layer
    model.add(Dense(
        units=hp.Int('units_input', min_value=4, max_value=64, step=4),
        activation='relu',
        input_shape=(x_train.shape[1],)
    ))

    # Tune number of additional hidden layers (1 to 3)
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Dense(
            units=hp.Int(f'units_{i}', min_value=4, max_value=64, step=4),
            activation='relu'
        ))
        #uncomment if you want a droput layer
        # if hp.Boolean(f'dropout_{i}'):
        #     model.add(Dropout(rate=hp.Float(f'dropout_rate_{i}', 0.1, 0.5, step=0.1)))

    # Output layer
    model.add(Dense(1))

    # Optimizer tuning
    optimizer_choice = hp.Choice('optimizer', ['adam', 'rmsprop'])
    if optimizer_choice == 'adam':
        optimizer = Adam(learning_rate=hp.Float('adam_lr', 1e-4, 1e-2, sampling='log'))
    else:
        optimizer = RMSprop(learning_rate=hp.Float('rms_lr', 1e-4, 1e-2, sampling='log'))

    model.compile(optimizer=optimizer, loss='mse', metrics=["mae"])
    return model

In [7]:
#!pip install -q -U keras-tuner

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m122.9/129.4 kB[0m [31m5.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [8]:
#from kerastuner.tuners import RandomSearch
import keras_tuner as kt

tuner = kt.RandomSearch(build_model,
                        objective='val_mae',       # minimize validation MAE
                        max_trials=10,           # number of random configurations to test
                        directory='tuner_dir',
                        project_name='ann_random_search'
)
# You can print a summary of the search space
#tuner.search_space_summary()

tuner.search(x_train, y_train, epochs=10, validation_data=(x_test, y_test), batch_size=5, verbose=2)

Trial 10 Complete [00h 00m 05s]
val_mae: 3.2109785079956055

Best val_mae So Far: 3.1155261993408203
Total elapsed time: 00h 01m 08s


In [9]:
best_hp = tuner.get_best_hyperparameters(1)[0]

print("Best hyperparameters found:")
for k, v in best_hp.values.items():
    print(f"{k}: {v}")

#extract best model
best_model = tuner.get_best_models(1)[0]

#refit
best_model.fit(x=x_train, y=y_train,epochs=40,validation_data=(x_test, y_test), batch_size = 10, verbose=1 )

predictions_train=best_model.predict(x_train)
predictions_test=best_model.predict(x_test)

# Metrics for test data
print("\nMetrics for Test data after hyper paramter tuning")
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, predictions_test))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, predictions_test))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error (y_test, predictions_test)))
print('R2 Score:', metrics.r2_score(y_test, predictions_test))

Best hyperparameters found:
units_input: 20
num_layers: 2
units_0: 24
optimizer: adam
adam_lr: 0.0017036374364839458
rms_lr: 0.0029517692699950553
units_1: 56
Epoch 1/40


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 165ms/step - loss: 20.1830 - mae: 3.1924 - val_loss: 43.6463 - val_mae: 4.5406
Epoch 2/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 29.1951 - mae: 3.6483 - val_loss: 21.7244 - val_mae: 3.2951
Epoch 3/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 16.8633 - mae: 2.9234 - val_loss: 19.7247 - val_mae: 3.3377
Epoch 4/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 19.1003 - mae: 3.0764 - val_loss: 29.0949 - val_mae: 3.5506
Epoch 5/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 20.1479 - mae: 3.2501 - val_loss: 20.7246 - val_mae: 3.3198
Epoch 6/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 11.4589 - mae: 2.4891 - val_loss: 24.5579 - val_mae: 3.5537
Epoch 7/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 20.4

You need to summerize the results here and highlight the best model and metrics.