In [None]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#### Prepare the csv file 
Read in the dataset and create a Pandas dataframe.

In [None]:
#Replace "CSVFILE" with your own CSV file path
data = pd.read_csv("CSVFILE")
data.head(5)

Can use .describe() to see some summary statistics for the numeric fields in the dataframe.

####  Split the dataset for ML
Split the dataset into train, validation, and test sets.

In [None]:
train, test = train_test_split(data, test_size=0.25)
train, val = train_test_split(train, test_size=0.33)

print(len(train), "train datas")
print(len(val), "validation datas")
print(len(test), "test datas")

train.to_csv("./train.csv")
val.to_csv("./val.csv")
test.to_csv("./test.csv")

Preprocess the data

In [None]:
# Normalize the dataset using StandardScaler.
def normalize_fn(dataset):
    scaler = StandardScaler()
    normalized_dataset = [
        pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
        for data in dataset
    ]
    return normalized_dataset


# Convert a Pandas Dataframe to tf.tensor
def tensor_fn(dataset, dtype=tf.float64):
    tensor_dataset = [tf.convert_to_tensor(data.values, dtype) for data in dataset]

    return tensor_dataset


# Create a normalized tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=48):
    dataframe = dataframe.copy()

    # Drop the column 'file_name'
    dataframe = dataframe.drop("file_name", axis=1)

    # Split dataframe into features and labels
    labels = dataframe[["pkg_energy", "dram_energy"]]
    features = dataframe.drop(columns=["pkg_energy", "dram_energy"])

    normalized_ds = normalize_fn([features, labels])
    tensor_ds = tensor_fn(normalized_ds)

    ds = tf.data.Dataset.from_tensor_slices((tensor_ds[0], tensor_ds[1]))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)

    return ds


# Initialize the training, validation and testing datasets.
batch_size = 48
train_ds = df_to_dataset(train)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)


#### Using the Keras Sequential Model

Define the function R-squared as a metric for the model

In [None]:
def r_squared(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
    ss_total = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))
    r2 = 1 - ss_res / (ss_total + tf.keras.backend.epsilon())
    return r2
 

Compile and Fit the Keras Sequential model.

In [None]:
# Model create
model = tf.keras.Sequential(
    [
        tf.keras.layers.Input((10,)),
        tf.keras.layers.Dense(24, activation="relu"),
        tf.keras.layers.Dense(12, activation="relu"),
        tf.keras.layers.Dense(4, activation="relu"),
        tf.keras.layers.Dense(2),
    ]
)

# Model complie
opt = tf.keras.optimizers.SGD(learning_rate=0.01)
loss = tf.keras.losses.Huber(delta=1.35)
model.compile(optimizer=opt, loss=loss, metrics=r_squared)

# Model fit
history = model.fit(train_ds, validation_data=val_ds, epochs=100)

Training and Validation Plot

In [None]:
def plot_history(history, metrics):
    nrows = 1
    ncols = 2
    fig = plt.figure(figsize=(12, 5))

    for idx, key in enumerate(metrics):
        ax = fig.add_subplot(nrows, ncols, idx + 1)
        plt.plot(history.history[key])
        plt.plot(history.history["val_{}".format(key)])
        plt.title("model {}".format(key))
        plt.ylabel(key)
        plt.xlabel("epochs")
        plt.legend(["train", "validation"])

plot_history(history, ["loss", "r_squared"])

Save and summary the model

In [None]:
model.save("model.keras")

model.summary()

Evaluate the model using the test set

In [None]:
# Model evaluate
loss, r_squred = model.evaluate(test_ds)

print("loss:", loss)
print("r_squred:", r_squred)