In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Load apple_quality.csv
apple_quality = pd.read_csv('apple_quality.csv')


#print the size of the dataset
print(apple_quality.shape)

#print the column names
print(apple_quality.columns)

(4001, 9)
Index(['A_id', 'Size', 'Weight', 'Sweetness', 'Crunchiness', 'Juiciness',
       'Ripeness', 'Acidity', 'Quality'],
      dtype='object')


In [8]:
#split the datat in a training and a test set
from sklearn.model_selection import train_test_split
X = apple_quality[['Size', 'Weight', 'Sweetness', 'Crunchiness', 'Juiciness',
       'Ripeness', 'Acidity']]
y = apple_quality['Quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

We denote the width of the neural network as $m := \min_{l \in [L]} m_l$ with $m_l$ the width of the $l$-th layer. By initializing the weights according to a normal centered reduced distribution, the last layer activation $\sigma_{L+1}$ satisfies $|\sigma'_{L+1}(z)| \geq \rho > 0$ and that $\lambda_0 := \lambda_{\min}(K(W_0)) > 0$ for any $\mu \in (0, \lambda_0 \rho^2)$, if the width of the network
verifies:
$$m = \widetilde{\Omega}\left(\frac{nR^{6L+2}}{(\lambda_0 - \mu\rho^{-2})^2}\right)$$

Then the $\mu\text{-PL}^*$ condition holds for the square loss function in the ball $B(w_0, R)$.

With a Relu function, $\rho = 1$ 

In [18]:
import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.hidden1 = nn.Linear(input_size, hidden_size)
        self.hidden2 = nn.Linear(hidden_size, 2*hidden_size)
        self.output = nn.Linear(2*hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.hidden1(x))
        x = self.relu(self.hidden2(x))
        x = self.output(x)
        return x

# Define the input size, hidden size, and output size
input_size = len(X.columns)
hidden_size = 20
output_size = 1

# Create an instance of the neural network model
model = NeuralNetwork(input_size, hidden_size, output_size)


#print the number of parameters per layer
for name, param in model.named_parameters():
    print(name, param.shape)

#print number of parameters²
print(sum(p.numel() for p in model.parameters()))


hidden1.weight torch.Size([20, 7])
hidden1.bias torch.Size([20])
hidden2.weight torch.Size([40, 20])
hidden2.bias torch.Size([40])
output.weight torch.Size([1, 40])
output.bias torch.Size([1])
1041


In [16]:
#Initialize the weight with normal distribution
def init_weights(m):
    if type(m) == nn.Linear:
        m.weight.data.normal_(0, 1)
        m.bias.data.fill_(0)

model.apply(init_weights)

NeuralNetwork(
  (hidden1): Linear(in_features=7, out_features=20, bias=True)
  (hidden2): Linear(in_features=20, out_features=40, bias=True)
  (output): Linear(in_features=40, out_features=1, bias=True)
  (relu): ReLU()
)

In [21]:
20*40

800

In [17]:
def F(w, X, loss_fn):
    #return the predict of the model with the weights w for the input x
    model.hidden1.weight.data = w[0:140].reshape(20,7)
    model.hidden1.bias.data = w[140:160]
    model.hidden2.weight.data = w[160:960].reshape(40,20)
    model.hidden2.bias.data = w[960:1000]
    model.output.weight.data = w[1000:1040].reshape(1,40)
    model.output.bias.data = w[1040:1041]

    pred = []
    for x in X:
        pred.append(model(x))


    return loss_fn(x)

def loss_fn(y_pred, y):
    return 1/2 * ((y_pred - y)**2).mean()

def compute_derivative_numerically(F, w, h=1e-7):
    return (F(w + h) - F(w - h)) / (2.0 * h)



