In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import joblib
from sklearn.metrics import r2_score
from helper import PgxPipeline,get_outliers, DropColumnTransformer
from sklearn.model_selection import GridSearchCV

In [None]:
df = pd.read_csv("datasets/pga_Rrup.csv")
outlier_indexes = get_outliers(df)
df = df.drop(outlier_indexes)
df.head()

In [None]:
process_pipeline = PgxPipeline("PGA",df).get_pipeline()

In [None]:
df_processed = pd.DataFrame(
    process_pipeline.transform(df),
    columns=["MW","Rrup","VS30","PGA","FT_Ters","FT_Yanal"]
)
X = df_processed.drop("PGA",axis=1)
y = df_processed["PGA"]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# Define the model architecture
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(units=5,activation="relu"))
model.add(tf.keras.layers.Dense(units=10,activation="relu"))
model.add(tf.keras.layers.Dense(units=20,activation="relu"))
model.add(tf.keras.layers.Dense(units=10,activation="relu"))
model.add(tf.keras.layers.Dense(units=5,activation="relu"))
model.add(tf.keras.layers.Dense(units=1))
model.compile(optimizer="adam",loss="mean_squared_error")
model.fit(x_train,y_train,batch_size=32,epochs=100)

In [11]:
def create_model(num_units, num_layers, loss_function, optimizer_function):
    model = tf.keras.models.Sequential()
    for i in range(num_layers):
        model.add(tf.keras.layers.Dense(num_units, activation='relu'))
    model.add(tf.keras.layers.Dense(1))
    model.compile(loss=loss_function, optimizer=optimizer_function)
    return model

In [13]:
param_grid = {
    'num_units': [5, 20,100],
    'num_layers': [1, 2, 3,4,5],
    'loss_function': ['mse', 'mae'],
    'optimizer_function': ['adam', 'rmsprop']
}
model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=create_model, verbose=0)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1)
grid_result = grid_search.fit(x_train, y_train)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: -0.107850 using {'loss_function': 'mse', 'num_layers': 5, 'num_units': 100, 'optimizer_function': 'adam'}


In [14]:
final_model = create_model(**grid_result.best_params_)
final_model.fit(x_train,y_train,batch_size=32,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x26d2d51b100>

In [16]:
y_predict = final_model.predict(x_test)
test_score = r2_score(y_test, y_predict)
print("test_score = ", test_score)

test_score =  0.9037590427164497
