<a href="https://colab.research.google.com/github/mafaldasalomao/pavic_treinamento_ml/blob/main/PAVIC_ML_05_Rede_Neural_Intui%C3%A7%C3%A3o.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Neste notebook, vamos codificar Redes Neurais de forma manual para tentar entender intuitivamente como elas são implementadas na prática.

# Sumário

- [Exemplo 1](#Exemplo-1)
- [Exemplo 2](#Exemplo-2)
- [O que precisamos para implementar uma Rede Neural?](#O-que-precisamos-para-implementar-uma-Rede-Neural?)
- [Referências](#Referências)

# Imports e Configurações

In [1]:
import numpy as np

# Exemplo 1

<img src='https://github.com/mafaldasalomao/pavic_treinamento_ml/blob/main/Machine_Learning/figures/backprop_example_1.png?raw=true'>

In [2]:
def sigmoid(x, derivative=False):
  if derivative:
    y = sigmoid(x)
    return y * (1 - y)
  return 1.0 /( 1.0 + np.exp(-x))

In [3]:
x = np.array([[0.05, 0.10]])
y = np.array([[0.01, 0.99]])
#pesos
w1 = np.array([[0.15, 0.20], [0.25, 0.30]])
#neste exemplo esta sendo usado um bia pra dois neuronios
b1 = np.array([[0.35]])

w2 = np.array([[0.40, 0.45], [0.50, 0.55]])
b2 = np.array([[0.60]])

learning_rate = 0.5

## Forward

In [4]:
#Forward
for i in range(1):
  #feed-forward
  #1 camada
  inp1 = np.dot(x, w1.T) + b1
  h1 = sigmoid(inp1)
  #2 camada
  inp2 = np.dot(h1, w2.T) + b2
  out = sigmoid(inp2)

  cost = 0.5 * np.sum((y-out)**2)

print(h1)
print(out)
print(cost)

[[0.59326999 0.59688438]]
[[0.75136507 0.77292847]]
0.2983711087600027


## BackProp

In [5]:
#desta vez o custo nao será 1
dout = -(y-out) # derivada da funçao de custo
#2a derivada
dinp2 = sigmoid(inp2, derivative=True) * dout
# VER NO SLIDE DE DERIVAÇÂO DE MATRIZES
dh1 = np.dot(dinp2, w2)
dw2 = np.dot(dinp2.T, h1)
db2 = 1.0 * dinp2.sum(axis=0, keepdims=True) # temos que adicionar do eixo 0 para somar  Senao perde a dimensionalidade
# 1 camada back
dinp1 = sigmoid(inp1, derivative=True) * dh1
dx = np.dot(dinp1, w1)
dw1 = np.dot(dinp1.T, x)
db1 = 1.0 * dinp1.sum(axis=0, keepdims=True)

print(dw1)
print(dw2)

#Agora vamos atualizar os pesos
w2 = w2 - learning_rate*dw2
b2 = b2 - learning_rate*b2
w1 = w1 - learning_rate*dw1
b1 = b1 - learning_rate*b1
#observe que os valores dos pesos foram modificados de maneira suave
print(w1)
print(w2)

[[0.00043857 0.00087714]
 [0.00049771 0.00099543]]
[[ 0.08216704  0.08266763]
 [-0.02260254 -0.02274024]]
[[0.14978072 0.19956143]
 [0.24975114 0.29950229]]
[[0.35891648 0.40866619]
 [0.51130127 0.56137012]]


# Exemplo 2

<img src="https://miro.medium.com/v2/resize:fit:828/format:webp/1*fnU_3MGmFp0LBIzRPx42-w.png"/>


<img ulr=https://miro.medium.com/v2/resize:fit:828/format:webp/1*fnU_3MGmFp0LBIzRPx42-w.png>

In [6]:
def linear(x, derivative=False):
    return np.ones_like(x) if derivative else x

def relu(x, derivative=False):
    if derivative:
        x = np.where(x <= 0, 0, 1)
    return np.maximum(0, x)

def softmax(x, y_oh=None, derivative=False): #y_one-hot
  if derivative:
    y_pred = softmax(x)
    k = np.nonzero(y_pred * y_oh)
    pk = y_pred[k]
    y_pred[k] = pk * (1.0 - pk)
    return y_pred
  exp = np.exp(x)
  return exp / np.sum(exp,  axis=1, keepdims=True) # vamos somar liha por linha das exp

def neg_log_likelihood(y_oh, y_pred, derivative=False):
  k = np.nonzero(y_pred * y_oh)
  pk = y_pred[k]
  if derivative:
    y_pred[k] = (-1.0 / pk)
    return y_pred
  return np.mean(-np.log(pk))

def softmax_neg_log_likelihood(y_oh, y_pred, derivative=False):
  y_softmax = softmax(y_pred)
  if derivative:
    k = np.nonzero(y_pred * y_oh) # pegar os campos reais de saída
    dlog = neg_log_likelihood(y_oh, y_softmax, derivative=True) #pegar a derivada de neg
    dsoftmax = softmax(y_pred, y_oh, derivative=True) # derivada da soft
    y_softmax[k] = dlog[k] * dsoftmax[k] # multiplicamos a derivada de dlog em relação a pk * derivada soft em relação a y
    return y_softmax / y_softmax.shape[0] #1/N
  return neg_log_likelihood(y_oh, y_softmax)

In [7]:
x = np.array([[0.1, 0.2, 0.7]])
y = np.array([[1, 0, 0]])
w1 = np.array([[0.1, 0.2, 0.3], [0.3, 0.2, 0.7], [0.4, 0.3, 0.9]])
b1 = np.ones((1,3))
w2 = np.array([[0.2, 0.3, 0.5], [0.3, 0.5, 0.7], [0.6, 0.4, 0.8]])
b2 = np.ones((1,3))
w3 = np.array([[0.1, 0.4, 0.8], [0.3, 0.7, 0.2], [0.5, 0.2, 0.9]])
b3 = np.ones((1,3))

learning_rate = 0.01

for i in range(301):
    # feedforward
    # 1a camada
    inp1 = np.dot(x, w1.T) + b1
    h1 = relu(inp1)
    # 2a camada
    inp2 = np.dot(h1, w2.T) + b2
    h2 = sigmoid(inp2)
  
    # 3a camada
    inp3 = np.dot(h2, w3.T) + b3
    out = linear(inp3)

    cost = softmax_neg_log_likelihood(y, out)
    # backpropagation
    # insira seu código aqui!
    dout = softmax_neg_log_likelihood(y, out, derivative=True)

    #3a camada
    dinp3 = linear(inp3, derivative=True) * dout
    dh2 = np.dot(dinp3, w3)
    dw3 = np.dot(dinp3.T, h2)
    db3 = 1.0 * dinp3.sum(axis=0, keepdims=True) 

    #2a camada
    dinp2 = sigmoid(inp2, derivative=True) * dh2
    dh1 = np.dot(dinp2, w2)
    dw2 = np.dot(dinp2.T, h1)
    db2 = 1.0 * dinp2.sum(axis=0, keepdims=True) 

    #1a camada
    dinp1 = relu(inp1, derivative=True) * dh1
    dx = np.dot(dinp1, w1)
    dw1 = np.dot(dinp1.T, x)
    db1 = 1.0 * dinp1.sum(axis=0, keepdims=True) 


    #Vamos atualizar os pesos
    w3 = w3 - learning_rate * dw3
    b3 = b3 - learning_rate * db3
    w2 = w2 - learning_rate * dw2
    b2 = b2 - learning_rate * db2
    w1 = w1 - learning_rate * dw1
    b1 = b1 - learning_rate * db1

    if i % 30 ==0:
      cost = softmax_neg_log_likelihood(y, out)
      print(cost)
for w in [w1, w2, w3]:
    print(w)

1.1674456052871238
0.6079795820435694
0.36558760028370507
0.24943451548999263
0.18547915491555542
0.14614614804167658
0.1199051550833629
0.10131084011234358
0.08751844300750028
0.07691720650783256
0.06853445083252653
[[0.10083595 0.2016719  0.30585165]
 [0.30086971 0.20173942 0.70608796]
 [0.40145052 0.30290104 0.91015363]]
[[0.20544723 0.30673159 0.50749567]
 [0.30994562 0.5123005  0.71366784]
 [0.61065514 0.41317913 0.81464085]]
[[ 0.66465527  0.98758148  1.39393956]
 [ 0.05020341  0.44006253 -0.06274803]
 [ 0.18514132 -0.12764401  0.56880846]]


# O que precisamos para implementar uma Rede Neural?

# Referências

- [Neural Network from Scratch](https://beckernick.github.io/neural-network-scratch/)
- [Backpropagation Algorithm](https://theclevermachine.wordpress.com/tag/backpropagation-algorithm/)
- [Back-Propagation is very simple. Who made it Complicated ?](https://becominghuman.ai/back-propagation-is-very-simple-who-made-it-complicated-97b794c97e5c)
- [A Step by Step Backpropagation Example](https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/)
- [Understanding softmax and the negative log-likelihood](https://ljvmiranda921.github.io/notebook/2017/08/13/softmax-and-the-negative-log-likelihood/)