# Neural network for regression and classification

$\newcommand{\bw}{w}$
$\newcommand{\bW}{W}$
$\newcommand{\bhW}{\hat{W}}$
$\newcommand{\bH}{H}$
$\newcommand{\bX}{X}$
$\newcommand{\by}{y}$
$\newcommand{\bh}{h}$
$\newcommand{\bp}{p}$
$\newcommand{\hy}{\hat{y}}$
$\newcommand{\bhy}{\hat{y}}$
$\newcommand{\bx}{x}$
$\newcommand{\bbeta}{\beta}$
$\newcommand{\bepsilon}{\epsilon}$
$\newcommand{\bhbeta}{\hat{\beta}}$

### Synthetic dataset

The work directory is given from the function.

In [None]:
def towdir(s):
    return (str('./datasets_book/'+s))

import deepglmlib.utils as utils
import numpy as np

In [None]:
import importlib
importlib.reload(utils)

In [None]:
import numpy as np
n1 = n2 = 500
n = 2 * n1
beta = np.array([-0.5,3.5,2.0]).reshape((3,1))

x1 = np.random.uniform(-2,2,n*100).reshape((n1*100,2))
x2 = np.random.uniform(-2,2,n*100).reshape((n1*100,2))

x1 = x1[np.sqrt(x1[:,0]**2+x1[:,1]**2)<0.44,:]
x2 = x2[np.sqrt(x2[:,0]**2+x2[:,1]**2)>0.46,:]

n1 = n2 = 50
n = 2 * n1

x = np.vstack([ x1[0:n1,:], x2[0:n2,:] ]) + \
    np.random.normal(0,0.01,n*2).reshape((n,2))
y = np.vstack([ np.zeros((n1,1)), np.ones((n2,1)) ])

X = np.hstack([ np.ones((len(x),1)), x])

In [None]:
np.savetxt(towdir("./xy_2d_diskandnoise_reglogistic.txt"),np.hstack([x,y]))

In [None]:
import numpy as np

xy = np.loadtxt(towdir("./xy_2d_diskandnoise_reglogistic.txt"))
x  = xy[:,[0,1]]
y  = xy[:,2].reshape((xy.shape[0],1))
X  = np.hstack([ np.ones((len(x),1)), x])
n = len(x)

Let shuffle the rows for avoiding any structure before mini-batches are cycled.

In [None]:
ids_random = np.random.permutation(len(y))
np.take(x,ids_random,axis=0,out=x)
np.take(y,ids_random,axis=0,out=y)
np.take(X,ids_random,axis=0,out=X)

x.shape, y.shape, X.shape

In [None]:
y  = y.ravel()

In [None]:
# %matplotlib inline
import matplotlib.pyplot as plt
import deepglmlib.utils as utils

# true frontier from data generation
theta = np.linspace(0, 2*np.pi, 20)
x1_circle = 0.45*np.cos(theta)
x2_circle = 0.45*np.sin(theta)

fig, (ax1) = plt.subplots(1, 1, figsize=(5,5))

utils.f_vizu2d_beta(ax1,x[y.ravel()==0,0],x[y.ravel()==1,0],
                    x[y.ravel()==0,1],x[y.ravel()==1,1], [], [],
                    xlim=[min(x[:,0]),max(x[:,0])],ylim=[min(x[:,1]),max(x[:,1])], 
                    samplename="Whole sample")
ax1_ = ax1.plot(x1_circle,x2_circle,color='m',label="true frontier")
plt.legend(fancybox=True, framealpha=0.2, loc="lower left")
plt.show()

## Training a neural network with an hidden layer for nonlinear logistic model 
## with pytorch

In [None]:
import torch

The subsamples are drawn randomly without replacement: the test set is found first via sampling because smaller and then substracted from the whole sample to get the train set. This is equivalent to a function in **sklearn** with more available options, but writen in two rows here. Note this is with suffle as the order of the indexes is not kept. The indexes are the working backbone of the approach in large dataset because this is not possible to replicate or load the full dataset in the computer memory.

In [None]:
ids_train, ids_test, ids_all = utils.f_splitIndex(n)

print(len(ids_all), len(ids_train), len(ids_test))
print(set(ids_all)-set(ids_test)-set(ids_train)) 

In [None]:
from torch.utils.data import DataLoader, TensorDataset
dataset = TensorDataset( torch.Tensor(x), torch.Tensor(y) )

In [None]:
dl_train, dl_test, n, n_train, n_test = utils.f_splitDataset(dataset)

In [None]:
n, n_train, n_test

### Definition of the neural network

In [None]:
import torch.nn as nn

class GNLMRegression(nn.Module):
    def __init__(self, name, layers):
        super().__init__()
        self.name = name
        self.layers = layers
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)

In [None]:
nb_nodes_in  = 2
nb_nodes_out = 1
nb_nodes_hid1 = 10

layers = []
layers.append(nn.Linear(nb_nodes_in,nb_nodes_hid1, bias=True))
layers.append(nn.Tanh())
layers.append(nn.Linear(nb_nodes_hid1, nb_nodes_out, bias=False))

### Definition of the required object befores training the nn

In [None]:
def transform_yb(yb,name_model,yhatb=None,device=None):
    return yb.ravel()

def transform_yhatb(yhatb,name_model):
    return yhatb.ravel()

It has been added in the class, the object net which is required for the function forward.

In [None]:
#not used after (included in the class)
net = nn.Sequential(*layers)
print(net)

In [None]:
print(layers)

### Training

In [None]:
import copy
model =  GNLMRegression("LMLP",copy.deepcopy(layers))

nbmax_epoqs=6000
alpha_t= 1e-3
debug_out=100
   
loss      = torch.nn.BCEWithLogitsLoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=alpha_t, momentum=0.0)
monitor   = utils.MyMonitorTest(model,loss,dl_train,dl_test,nbmax_epoqs,debug_out)

loss_train_s,tmax,monistopc  = utils.f_train_glmr(dl_train,model,optimizer,loss,monitor,
                                                device=None,printed=2, 
                                                loss_yy_model = None,
                                                transform_Xb=None,
                                                transform_yb=transform_yb,
                                                transform_yhatb=transform_yhatb,)

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
plt = utils.f_draw_s([ range(len(loss_train_s)), monitor.step_test_s[monitor.loss_test_s>0].astype(int) ],
               [ loss_train_s/n_train,monitor.loss_test_s[monitor.loss_test_s>0]/n_test],
               ["b-", "r-"] ,"t",[ "loss train", "loss test"], " ",ax)

In [None]:
torch.save(model,towdir("deepmodel_diskandnoise.pth"))
torch.save(model.state_dict(),towdir("deepmodelw_diskandnoise.pth"))

In [None]:
acc_train, yhat_train, y_train = utils.f_test_glmr(model,dl_train,True)
acc_test, yhat_test, y_test = utils.f_test_glmr(model,dl_test, True)

print("acc_train=",utils.nprd(acc_train,4), " acc_test=",utils.nprd(acc_test,4))

Note that there is also available the class **ModuleList** for dealing with list of laters, but this was not considered herein.

In [None]:
import deepglmlib.utils as utils
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
utils.f_plot_2d_boudary_MLP(ax,model,x,y,300)
ax.plot(x1_circle,x2_circle,color='m',label="true frontier")
ax.legend(fancybox=True, framealpha=0.2, loc="lower left")