# Multi-layered neural network 

In [3]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv("../books_data.csv")
df.head()

# check for nan values in the dataset
df.isna().any()
# We can see that there are nan values in the Sales_in_millions column, i will replace these with 0
df['Sales_in_millions'] = df['Sales_in_millions'].fillna(0)

df.head()

Unnamed: 0.1,Unnamed: 0,Books,Authors,Language,First_Published,Sales_in_millions
0,0,A Tale of Two Cities,Charles Dickens,English,1859,200.0
1,1,The Little Prince (Le Petit Prince),Antoine de Saint-Exupéry,French,1943,200.0
2,2,Harry Potter and the Philosopher's Stone,J. K. Rowling,English,1997,120.0
3,3,And Then There Were None,Agatha Christie,English,1939,100.0
4,4,Dream of the Red Chamber (紅樓夢),Cao Xueqin,Chinese,1791,100.0


In [37]:
# Split the dataset into dependent and independent variables
y = df["Sales_in_millions"]
X = df.drop("Sales_in_millions", axis=1)

# Convert authors and language to numeric values using one-hot encoding
X_enc = pd.get_dummies(X, columns=["Books", "Authors", "Language"])

# Converting all First_Published variables to float values
def convert_to_float(value):
    try:
        return float(value)
    except:
        return float(value[:4])

# Convert the First_Published column to float
X_enc["First_Published"] = X_enc["First_Published"].apply(convert_to_float)

# Normalizing using z-score
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_enc["First_Published"] = scaler.fit_transform(X_enc["First_Published"].values.reshape(-1, 1))
y = scaler.fit_transform(np.array(y).reshape(-1, 1))

# Converting values to float for tensors later
X_enc = X_enc.astype(float)
y = y.astype(float)

In [38]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_enc, y, test_size=0.2, random_state=42)

In [42]:
from keras.models import Sequential
from keras.layers import Dense

# Define the model
model = Sequential()

# Add the input layer
model.add(Dense(units=64, activation='relu', input_shape=(564,)))

# Add hidden layers
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=64, activation='relu'))

# Add the output layer
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Print the model summary
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_20 (Dense)            (None, 64)                36160     
                                                                 
 dense_21 (Dense)            (None, 128)               8320      
                                                                 
 dense_22 (Dense)            (None, 64)                8256      
                                                                 
 dense_23 (Dense)            (None, 1)                 65        
                                                                 
Total params: 52,801
Trainable params: 52,801
Non-trainable params: 0
_________________________________________________________________


In [43]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [44]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Assuming 'model' is your trained model, 'X_test' is your test data and 'y_test' are the true labels
y_pred = model.predict(X_test)

# Calculate the mean absolute error and mean squared error
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

# Print the mean absolute error and mean squared error
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")

Mean Absolute Error: 0.939693249866046
Mean Squared Error: 1.1960968233751337


In [53]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

# Define a function to create a model with a given number of layers and neurons
def create_model(num_layers, num_neurons, learning_rate):
    model = Sequential()
    for i in range(num_layers):
        model.add(Dense(num_neurons, activation='relu'))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])
    return model

results = []

# Define the hyperparameters
num_layers = [2, 3]
num_neurons = [64, 128]
learning_rates = [0.01, 0.001]
epochs = [50, 100]

# Loop over the hyperparameters
for layers in num_layers:
    for neurons in num_neurons:
        for rate in learning_rates:
            for epoch in epochs:
                model = create_model(layers, neurons, rate)
                history = model.fit(X_train, y_train, epochs=epoch, validation_split=0.2, verbose=0)
                y_pred = model.predict(X_test)
                mae = mean_absolute_error(y_test, y_pred)
                mse = mean_squared_error(y_test, y_pred)
                results.append((layers, neurons, epoch, rate, mae, mse))

# Getting the best result based on the mean of the mae and mse
best_results = sorted(results, key=lambda result: (result[4]+result[5])/2)[0]
print(f"Best result:\nLayers: {best_results[0]}\nNeurons: {best_results[1]}\nEpochs: {best_results[2]} \nLearning Rate: {best_results[3]}\nMean Absolute Error: {best_results[4]}\nMean Squared Error: {best_results[5]}")

Best result:
Layers: 3
Neurons: 128
Epochs: 50 
Learning Rate: 0.01
Mean Absolute Error: 0.5281254446754493
Mean Squared Error: 0.7421133431215033


Running different tests with various number of layers, neurons, epochs and learning rate.
From this the best result is performed with:
* 128 neurons
* 3 layers
* 50 epochs
* 0.1 learning rate

Additional features would mean altering the dataset or extracting new information from the already given data

Different preprocessing techniques would mean finding different methods than one-hot encoding and z-score to process the data.