In [358]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

In [359]:
# Load the data
df = pd.read_csv('./datasets/diamonds.csv')
df = df.drop(df.columns[0], axis=1)
print(df.shape)
df.head()

(53943, 10)


Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [360]:
# Definition of orders and label encoding
cut_order = ['Fair', 'Good', 'Very Good', 'Premium', 'Ideal']
clarity_order = ['I1', 'SI2', 'SI1', 'VS2', 'VS1', 'VVS2', 'VVS1', 'IF']

df['cut'] = df['cut'].astype('category')
df['cut'] = df['cut'].cat.set_categories(cut_order, ordered=True)
df['cut'] = df['cut'].cat.codes

df['clarity'] = df['clarity'].astype('category')
df['clarity'] = df['clarity'].cat.set_categories(clarity_order, ordered=True)
df['clarity'] = df['clarity'].cat.codes

# Now use one-hot encoding for color
"""
one_hot_encoder = OneHotEncoder(sparse=False)
color_encoded = one_hot_encoder.fit_transform(df['color'].values.reshape(-1,1))
df_color = pd.DataFrame(color_encoded, columns=one_hot_encoder.get_feature_names_out())
"""

# drop the original 'color' column
df.drop(['color','cut','clarity'], axis=1, inplace=True)

# concat the one-hot encoded 'color' dataframe with the original dataframe
#df = pd.concat([df, df_color], axis=1)

# scale the other number variables
scaler = StandardScaler()
df[['carat', 'depth', 'table', 'x', 'y', 'z','price']] = scaler.fit_transform(df[['carat', 'depth', 'table', 'x', 'y', 'z','price']])

"""color_mapping = {
    'x0_D': 'color_D',
    'x0_E': 'color_E',
    'x0_F': 'color_F',
    'x0_G': 'color_G',
    'x0_H': 'color_H',
    'x0_I': 'color_I',
    'x0_J': 'color_J',
}

df.rename(columns=color_mapping, inplace=True)"""

print(df.shape)
print(print(df.columns))
df.head()

(53943, 7)
Index(['carat', 'depth', 'table', 'price', 'x', 'y', 'z'], dtype='object')
None


Unnamed: 0,carat,depth,table,price,x,y,z
0,-1.198189,-0.174033,-1.099673,-0.904102,-1.587882,-1.536239,-1.571166
1,-1.240384,-1.360676,1.585457,-0.904102,-1.641372,-1.658821,-1.741217
2,-1.198189,-3.384949,3.375544,-0.903851,-1.498733,-1.457436,-1.741217
3,-1.071605,0.454189,0.242892,-0.902096,-1.36501,-1.317342,-1.287749
4,-1.029411,1.082412,0.242892,-0.901846,-1.240202,-1.212272,-1.117699


In [361]:
X = df.drop('price', axis=1).values
y = df['price'].values
print(len(X), len(y))

53943 53943


In [362]:
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

In [377]:
from sklearn.utils import shuffle

def shuffle_data(X, y):
    return shuffle(X, y, random_state=0)

def create_batches(data, batch_size):
    for i in range(0, len(data), batch_size):
        yield i, data[i:i + batch_size]

class DenseLayer:
    def __init__(self, num_inputs ,num_neurons):
        self.weights = np.random.normal(0, 0.1, (num_inputs, num_neurons))
        self.bias = np.zeros(num_neurons)

    def forward(self, inputs):
        self.output =  inputs @ self.weights + self.bias
        
    def update_params(self, dweights, dbias, lr):
        self.weights -= lr * dweights
        self.bias -= lr * dbias

def relu(x):
    return np.maximum(0, x)

def mse(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

def dev_mse(y_true, y_pred):
    return -2 * (y_true - y_pred)

#forward propagation
#h1 = relu(w1x + b1)
#h2 = relu(w2h1 + b2)
#o = w3*h2 + b3

#cost function
# L = 1/n  * Σ(o_i - y_i)^2

#backpropagation
#dL/do = 2/n*(o - y_actual)
#do/dw3 = h2
#do/dh2 = w3
#dh2/dw2 = h1 if w2h1 + b2 > 0 else 0
#dh1/dw1 = x if w1x + b1 > 0 else 0

#example equations
#dL/dw1 = 2 * (o - y_actual) * w3 * (1 if w2h1 + b2 > 0 else 0) * w2 * (1 if w1x + b1 > 0 else 0) * x

#for bias
#dL/db1 = dL/do * do/dh2 * dh2/dh1 * dh1/db1
#dL/db2 = dL/do * do/dh2 * dh2/db2
#dL/db3 = dL/do * do/db3

#example equations
#dL/db1 = 2*(o - y_actual) * w3 * (1 if w2h1 + b2 > 0 else 0) * w2  * (1 if w1x + b1 > 0 else 0)
#dL/db2 = 2*(o - y_actual) * w3 * (1 if w2h1 + b2 > 0 else 0)
#dL/db3 = 2(o - y_actual)



In [364]:
X = np.array([1,2,3,4])
layerZ = DenseLayer(len([1,2,3,4]),16)
print("weights shape",layerZ.weights.shape)
layerZ.forward(X)
A1_value = relu(layerZ.output)
print("relu shape",A1_value.shape)
layerZ2 = DenseLayer(len(A1_value),8)
print("weights shape",layerZ2.weights.shape)
layerZ2.forward(A1_value)
A2_value = relu(layerZ2.output)
print("relu shape",A2_value.shape)
output = DenseLayer(len(A2_value),1)
print("weights shape",output.weights.shape)
output.forward(A2_value)

#doing backpropagation
der_mse = dev_mse([0.5],output.output)
output_back_bias = der_mse # scalar
output_back_weights = np.outer(A2_value, der_mse)
print("back output weights", output_back_weights.shape)

dev_reluZ2_bias = np.where(A2_value > 0, 1, 0)
layerZ2_back_bias =  np.dot(der_mse, output.weights.T[0]) * dev_reluZ2_bias
layerZ2_back_weights = np.outer(A1_value, layerZ2_back_bias) 
print("back Z2 weights", layerZ2_back_weights.shape, layerZ2_back_bias.shape)

dev_reluZ1_bias = np.where(A1_value > 0, 1, 0)
layerZ1_back_bias = np.dot(layerZ2_back_bias, layerZ2.weights.T) * dev_reluZ1_bias
layerZ1_back_weight = np.outer(X, layerZ1_back_bias)
print("back Z1 weights", layerZ1_back_weight.shape, layerZ1_back_bias.shape)

# Update weights and biases
lr = 0.01  # learning rate
layerZ.update_params(layerZ1_back_weight, layerZ1_back_bias, lr)
layerZ2.update_params(layerZ2_back_weights, layerZ2_back_bias, lr)
output.update_params(output_back_weights, output_back_bias, lr)


weights shape (4, 16)
relu shape (16,)
weights shape (16, 8)
relu shape (8,)
weights shape (8, 1)
back output weights (8, 1)


ValueError: shapes (1,) and (8,) not aligned: 1 (dim 0) != 8 (dim 0)

In [378]:
#Defining layer sizes
num_inputs = 6
num_hidden = 64
num_hidden2 = 32
num_outputs = 1

# Create the layers
layer1 = DenseLayer(num_inputs, num_hidden)
layer2 = DenseLayer(num_hidden, num_hidden2)
output_layer = DenseLayer(num_hidden2, num_outputs)
num_epochs = 12
# Learning rate
lr = 0.01

# Training loop
for epoch in range(num_epochs):
    epoch_loss = 0
    for i, batch in create_batches(X_train, 32):
        #print("batch",batch.shape)
        y_batch = y_train[i:i + len(batch)]
        # Forward pass
        layer1.forward(batch)
        A1 = relu(layer1.output)
        #print("A1",A1.shape)
        
        layer2.forward(A1)
        A2 = relu(layer2.output)
        
        #print("A2",A2.shape)
        
        output_layer.forward(A2)
        y_pred = output_layer.output
        #print("output",y_pred.shape)

        # Calculate loss
        batch_loss = mse(y_batch, y_pred)
        epoch_loss += batch_loss

        # Backward pass
        
        # output_layer
        der_mse = dev_mse(y_batch, y_pred.reshape(-1))
        output_back_bias = np.sum(der_mse, axis=0)
        output_back_weights = A2.T @ der_mse
        #print("output weights",output_back_weights.shape)
        # layer2 
        #print(der_mse.shape, output_layer.weights.shape)
        dev_h2 = (A2 > 0).astype(float) # ReLU derivative
        L2_back_bias = der_mse.reshape(-1, 1) @ output_layer.weights.T * dev_h2
        #L2_back_bias = np.sum(L2_back_bias, axis=0) # Sum over batch
        #print("L2 bias",L2_back_bias.shape)
        L2_back_weights = A1.T @ L2_back_bias / len(A1)
        #print("L2 weights",layer2.weights.shape)
        # layer1
        dev_h1 = (A1 > 0).astype(float) # ReLU derivative
        L1_back_bias = L2_back_bias @ layer2.weights.T * dev_h1
        #print("L1 bias",L1_back_bias.shape)
        L1_back_weights = batch.T @ L1_back_bias / len(batch)
        #print("L1 weights",L1_back_weights.shape)
        
        # Update weights and biases
        output_layer.update_params(output_back_weights.reshape(-1, 1),output_back_bias, lr) 
        layer2.update_params(L2_back_weights,np.sum(L2_back_bias, axis=0),lr)
        layer1.update_params(L1_back_weights,np.sum(L1_back_bias, axis=0),lr)
        #print("i",i, epoch_loss)
        
            

    print(f'Epoch {epoch+1}, loss: {epoch_loss/len(X_train)}')

# Test the model
layer1.forward(X_test)
A1 = relu(layer1.output)

layer2.forward(A1)
A2 = relu(layer2.output)

output_layer.forward(A2)
y_pred = output_layer.output

test_loss = mse(y_test, y_pred)
print(f'Test loss: {test_loss}')

    

Epoch 1, loss: 0.05659965715980727
Epoch 2, loss: 0.05711412369277429
Epoch 3, loss: 0.05712086998944007
Epoch 4, loss: 0.05715956248420158
Epoch 5, loss: 0.05717114036536126
Epoch 6, loss: 0.05718808536680764
Epoch 7, loss: 0.057200583524243884
Epoch 8, loss: 0.05720714977514421
Epoch 9, loss: 0.05721397559190969
Epoch 10, loss: 0.057217722744095326
Epoch 11, loss: 0.05722727385562494
Epoch 12, loss: 0.05723042023418636
Test loss: 1.8871032097175606


In [371]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# create a StandardScaler to normalize the input features
scaler = StandardScaler()

# create the MLPRegressor
mlp = MLPRegressor(hidden_layer_sizes=(64, 32), activation='relu', solver='adam', 
                   learning_rate='adaptive', max_iter=100, batch_size=32, verbose=True)

# create a pipeline that first normalizes the input features and then fits the MLPRegressor
model = make_pipeline(scaler, mlp)

# train the model
model.fit(X_train, y_train)

Iteration 1, loss = 0.06889101
Iteration 2, loss = 0.06311986
Iteration 3, loss = 0.06186622
Iteration 4, loss = 0.06212780
Iteration 5, loss = 0.06092901
Iteration 6, loss = 0.06053496
Iteration 7, loss = 0.05989047
Iteration 8, loss = 0.05993571
Iteration 9, loss = 0.05937296
Iteration 10, loss = 0.05938943
Iteration 11, loss = 0.05912356
Iteration 12, loss = 0.05911808
Iteration 13, loss = 0.05882332
Iteration 14, loss = 0.05879753
Iteration 15, loss = 0.05870347
Iteration 16, loss = 0.05872644
Iteration 17, loss = 0.05851157
Iteration 18, loss = 0.05848061
Iteration 19, loss = 0.05830786
Iteration 20, loss = 0.05841894
Iteration 21, loss = 0.05838787
Iteration 22, loss = 0.05841800
Iteration 23, loss = 0.05810619
Iteration 24, loss = 0.05821441
Iteration 25, loss = 0.05800304
Iteration 26, loss = 0.05815076
Iteration 27, loss = 0.05794808
Iteration 28, loss = 0.05799040
Iteration 29, loss = 0.05792071
Iteration 30, loss = 0.05786458
Iteration 31, loss = 0.05791078
Iteration 32, los

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('mlpregressor',
                 MLPRegressor(batch_size=32, hidden_layer_sizes=(64, 32),
                              learning_rate='adaptive', max_iter=100,
                              verbose=True))])