# 3.6 Softmax regression from scratch

In [1]:
from IPython import display
from mxnet import autograd, gluon, np, npx
from d2l import mxnet as d2l

npx.set_np()

ModuleNotFoundError: No module named 'mxnet'

In [None]:
batch_size=256
train_iter, test_iter=d2l.load_data_fashion_mnist(batch_size)

In [None]:
num_inputs=784
num_outputs=10

W=np.random.normal(0,0.01,(num_inputs,num_outputs))
b=np.zeros(num_outputs)
W.attach_grad()
b.attach_grad()

In [None]:
X=np.array([[1.0,2.0,3.0],[4.0,5.0,6.0]])
X1=X.sum(0,keepdims=True)
X2=X.sum(1,keepdims=True)
X.shape,X1,X1.shape,X2,X2.shape

In [None]:
def softmax(X):
    X_exp=np.exp(X)
    partition = X_exp.sum(1,keepdims=True)
    return X_exp/partition

In [None]:
X = np.random.normal(0,1,(2,5))
X_prob = softmax(X)
X,X_prob, X_prob.sum(1)

In [None]:
def net(X):
    return softmax(np.dot(X.reshape((-1,W.shape[0])),W)+b)

# Loss function

In [None]:
y=np.array([0,2])
y_hat=np.array([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y_hat[[0,1],y]

In [None]:
def cross_entropy(y_hat,y):
    return -np.log(y_hat[range(len(y_hat)),y])

cross_entropy(y_hat,y)

#### 3.6.5 Classification Accuracy

In [None]:
def accuracy(y_hat,y):
    if (len(y_hat.shape)>1 and y_hat.shape[1]>1):
        y_hat=y_hat.argmax(axis=1)
    cmp=y_hat.astype(y.dtype)==y
    return float(cmp.astype(y.dtype).sum())

In [None]:
accuracy(y_hat, y)/len(y)

In [None]:
class Accumulator:
    def __init__(self,n):
        self.data=[0,0]*n
    
    def add(self,*args):
        self.data=[a+float(b) for a,b in zip(self.data,args)]
    def reset(self):
        self.data=[0,0]*len(self.data)
    def __getitem__(self,idx):
        return self.data[idx]

In [None]:
def evaluate_accurary(net,data_iter):
    metric=Accumulator(2)
    for X,y in data_iter:
        metric.add(accuracy(net(X),y),d2l.size(y))
    return metric[0]/metric[1]

In [None]:
evaluate_accurary(net,test_iter)

In [None]:
def train_epoch_ch3(net,train_iter,loss,updater):
    metric=Accumulator(3)
    if isinstance(updater,gluon.Trainer):
        updater=updater.step
    for X,y in train_iter:
        with autograd.record():
            y_hat=net(X)
            l=loss(y_hat,y)
        l.backward()
        updater(X.shape[0])
        metric.add(float(l.sum()),accuracy(y_hat,y),y.size)
    return metric[0]/metric[2],metric[1]/metric[2]
    

In [None]:
class Animator:
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,ylim=None,
     xscale='linear', yscale='linear', fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, 
     ncols=1,figsize=(3.5, 2.5)):
        if legend is None:
            legend=[]
        d2l.use_svg_display()
        self.fig,self.axes=d2l.plt.subplots(nrows,ncols,figsize=figsize)
        if nrows*ncols==1:
            self.axes=[self.axes,]
        self.config_axes=lambda:d2l.set_axes(
            self.axes[0],xlabel,ylabel,xlim,ylim,xscale,yscale,legend)
        self.X,self.Y,self.fmts=None,None,fmts

    def add(self,x,y):
        if not hasattr(y,"__len__"):
            y=[y]
        n=len(y)
        if not hasattr(x,"__len__"):
            x=[x]*n
        if not self.X:
            self.X=[[] for _ in range(n)]
        if not self.Y:
            self.Y=[[]for _ in range(n)]
        for i,(a,b) in enumerate(zip(x,y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x,y,fmt in zip(self.X,self.Y,self.fmts):
            self.axes[0].plot(x,y,fmt)
        self.config_axes()
        display.display(self.fig)
        display.clear_output(wait=True)

In [None]:
def train_ch3(net,train_iter,test_iter,loss,num_epochs,updater):
    animator=Animator(xlabel='epoch',xlim=[1,num_epochs],ylim=[0.3,0.9],
    legend=['train loss','train acc','test acc'])
    for epoch in range(num_epochs):
        train_metrics=train_epoch_ch3(net,train_iter,loss,updater)
        test_acc=evaluate_accurary(net,test_iter)
        animator.add(epoch+1,train_metrics+(test_acc,))
    train_loss,train_acc=train_metrics
    assert train_loss< 0.5,train_loss
    assert train_acc<=1 and train_acc> 0.7, train_acc
    assert test_acc <=1 and test_acc > 0.7, test_acc

In [None]:
lr=0.1
def updater(batch_size):
    return d2l.sgd([W,b],lr,batch_size)

In [None]:
num_epochs=10
train_ch3(net,train_iter,test_iter,cross_entropy,num_epochs,updater)

# deveria mostrar um gráfico....


In [None]:
def predict_ch3(net,test_iter,n=6):
    for X,y in test_iter:
        break;
    trues=d2l.get_fashion_mnist_labels(y)
    preds=d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    titles=[true + '\n' + pred for true,pred in zip(trues,preds)]
    d2l.show_images(X[0:n].reshape((n,28,28)),1,n,titles=titles[0:n])

predict_ch3(net,test_iter)