## Aprendizado Profundo - UFMG

## Problemas

Como vimos acima, há muitos passos na criação e definição de uma nova rede neural.
A grande parte desses ajustes dependem diretamente do problemas.

Abaixo, listamos alguns problemas. Todos os problemas e datasets usados vem do [Center for Machine Learning and Intelligent Systems](http://archive.ics.uci.edu/ml/datasets.php).


**Seu objetivo é determinar e implementar um modelo para cada problema.**

Isso inclui definir uma arquitetura (por enquanto usando somente camadas [Densas](https://mxnet.incubator.apache.org/api/python/gluon/nn.html#mxnet.gluon.nn.Dense), porém podemos variar as ativações -- [Sigmoid](https://mxnet.incubator.apache.org/api/python/symbol/symbol.html#mxnet.symbol.Symbol.sigmoid), [Tanh](https://mxnet.incubator.apache.org/api/python/symbol/symbol.html#mxnet.symbol.Symbol.tanh), [ReLU](https://mxnet.incubator.apache.org/api/python/symbol/symbol.html#mxnet.symbol.Symbol.relu), [LeakyReLU, ELU, SeLU, PReLU, RReLU](https://mxnet.incubator.apache.org/api/python/symbol/symbol.html#mxnet.symbol.LeakyReLU)), uma função de custo ( [L1](https://mxnet.incubator.apache.org/api/python/gluon/loss.html#mxnet.gluon.loss.L2Loss), [L2](https://mxnet.incubator.apache.org/api/python/gluon/loss.html#mxnet.gluon.loss.L1Loss),[ Huber](https://mxnet.incubator.apache.org/api/python/gluon/loss.html#mxnet.gluon.loss.HuberLoss), [*Cross-Entropy*](https://mxnet.incubator.apache.org/api/python/gluon/loss.html#mxnet.gluon.loss.SoftmaxCrossEntropyLoss), [Hinge](https://mxnet.incubator.apache.org/api/python/gluon/loss.html#mxnet.gluon.loss.HingeLoss)), e um algoritmo de otimização ([SGD](https://mxnet.incubator.apache.org/api/python/optimization/optimization.html#mxnet.optimizer.SGD), [Momentum](https://mxnet.incubator.apache.org/api/python/optimization/optimization.html#mxnet.optimizer.SGD), [RMSProp](https://mxnet.incubator.apache.org/api/python/optimization/optimization.html#mxnet.optimizer.RMSProp), [Adam](https://mxnet.incubator.apache.org/api/python/optimization/optimization.html#mxnet.optimizer.Adam)).

A leitura do dado assim como a função de treinamento já estão implementados.

Esse pequeno bloco de código abaixo é usado somente para instalar o MXNet para CUDA 10. Execute esse bloco somente uma vez e ignore possíveis erros levantados durante a instalação.

**ATENÇÃO: a alteração deste bloco pode implicar em problemas na execução dos blocos restantes!**

In [13]:
#! pip install mxnet-cu100



# Preâmbulo

In [0]:
# imports basicos

from mxnet import autograd
from mxnet import gluon
from mxnet import init
from mxnet import nd

from mxnet.gluon import data as gdata
from mxnet.gluon import loss as gloss
from mxnet.gluon import nn
from mxnet.gluon import utils as gutils

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

import mxnet as mx
import numpy as np

import os
import sys
import time

In [0]:
import matplotlib.pyplot as plt
plt.ion()

In [16]:
# Tenta encontrar GPU
def try_gpu():
    try:
        ctx = mx.gpu()
        _ = nd.zeros((1,), ctx=ctx)
    except mx.base.MXNetError:
        ctx = mx.cpu()
    return ctx

ctx = try_gpu()
ctx

gpu(0)

In [0]:
# funções básicas

def load_array(features, labels, batch_size, is_train=True):
    """Construct a Gluon data loader"""
    dataset = gluon.data.ArrayDataset(features, labels)
    return gluon.data.DataLoader(dataset, batch_size, shuffle=is_train)

def _get_batch(batch, ctx):
    """Return features and labels on ctx."""
    features, labels = batch
    if labels.dtype != features.dtype:
        labels = labels.astype(features.dtype)
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx), features.shape[0])

# Função usada para calcular acurácia
def evaluate_accuracy(data_iter, net, loss, ctx=[mx.cpu()]):
    """Evaluate accuracy of a model on the given data set."""
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc_sum, n, l = nd.array([0]), 0, 0
    for batch in data_iter:
        features, labels, _ = _get_batch(batch, ctx)
        for X, y in zip(features, labels):
            # X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            y = y.astype('float32')
            y_hat = net(X)
            l += loss(y_hat, y).sum()
            acc_sum += (y_hat.argmax(axis=1) == y).sum().copyto(mx.cpu())
            n += y.size
        acc_sum.wait_to_read()
    return acc_sum.asscalar() / n, l.asscalar() / n
  
    
# Função usada no treinamento e validação da rede
def train_validate(net, train_iter, test_iter, batch_size, trainer, loss, ctx,
                   num_epochs, type='regression'):
    print('training on', ctx)
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y.astype('float32')).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc, test_loss = evaluate_accuracy(test_iter, net, loss, ctx)
        if type == 'regression':
            print('epoch %d, train loss %.4f, test loss %.4f, time %.1f sec'
                    % (epoch + 1, train_l_sum / n, test_loss, time.time() - start))
        else:
            print('epoch %d, train loss %.4f, train acc %.3f, test loss %.4f, ' \
                  'test acc %.3f, time %.1f sec' % \
                  (epoch + 1, train_l_sum / n, train_acc_sum / n, test_loss, test_acc, time.time() - start))
          
        
# funcao usada para teste
def test(net, test_iter):
    print('testing on', ctx)
    first = True
    for X in test_iter:
        X = X.as_in_context(ctx)
        y_hat = net(X)
        if first is True:
            pred_logits = y_hat
            pred_labels = y_hat.argmax(axis=1)
            first = False
        else:
            pred_logits = nd.concat(pred_logits, y_hat, dim=0)
            pred_labels = nd.concat(pred_labels, y_hat.argmax(axis=1), dim=0)

    return pred_logits.asnumpy(), pred_labels.asnumpy()

## Problema 1

Neste problema, você receberá 7 *features* extraídas de poços de petróleo ('BRCALI', 'BRDENS', 'BRDTP', 'BRGR', 'BRNEUT', 'BRRESC', 'BRRESP') e deve predizer o tipo de rocha.

### Treino e Validação

Primeiro, vamos modelar uma rede neural e treiná-la.
Usamos o dado de treino carregado no próximo bloco para convergir o modelo e o dado de validação para avaliar quão bom ele estão. 

In [112]:
#!wget https://www.dropbox.com/s/ujnqxh6l43tlbdi/poco_1.prn

# Cria o dataset com um certo batch size a partir das features e labels
def load_array(features, labels, batch_size, is_train=True):
    dataset = gluon.data.ArrayDataset(features, labels)
    return gluon.data.DataLoader(dataset, batch_size, shuffle=is_train)

# Abre os dados
X = np.loadtxt('poco_1.prn', skiprows=11, usecols=(1,2,3,4,5,6,7), dtype=np.float32)
y = np.loadtxt('poco_1.prn', skiprows=11, usecols=8, dtype=np.str)

le = preprocessing.LabelEncoder()
le.fit(list(set(y)))
y_t = le.transform(y)

# Pré processamento dos dados
y_t_min, y_t_ptp = y_t.min(), y_t.ptp() # Parametros da normalização
X_min, X_ptp = X.min(0), X.ptp(0) # Parametros da normalização

y_t = (y_t - y_t_min) / y_t_ptp # Normalização de y
X = (X - X_min) / X_ptp # Normalização de X

# Divide os dados nos dados de treino e validação
train_features, test_features, train_labels, test_labels = train_test_split(X, y_t, test_size=0.33)
  
# Definição dos dados de treino/validação
batch_size = 10
train_iter = load_array(train_features, train_labels, batch_size)
test_iter = load_array(test_features, test_labels, batch_size, False)

# Criação da MLP
net = gluon.nn.Sequential()
net.add(gluon.nn.Dense(8, activation="relu"))
net.add(gluon.nn.Dense(1))
  
# Inicialização dos pesos
net.collect_params().initialize(mx.init.Normal(sigma=0.1), ctx=ctx)

# Otimizador - ADAM
trainer = gluon.Trainer(net.collect_params(), 'adam')

# Função de custo - L2
loss = gluon.loss.L2Loss()

# Treina o modelo
train_validate(net, train_iter, test_iter, batch_size, trainer, loss, ctx, 30)

# Testa o modelo
evaluate_accuracy(test_iter, net, loss, ctx)

training on gpu(0)
epoch 1, train loss 0.1118, test loss 0.0861, time 0.6 sec
epoch 2, train loss 0.0707, test loss 0.0559, time 0.6 sec
epoch 3, train loss 0.0545, test loss 0.0440, time 0.6 sec
epoch 4, train loss 0.0477, test loss 0.0396, time 0.6 sec
epoch 5, train loss 0.0438, test loss 0.0364, time 0.6 sec
epoch 6, train loss 0.0411, test loss 0.0352, time 0.6 sec
epoch 7, train loss 0.0394, test loss 0.0331, time 0.6 sec
epoch 8, train loss 0.0378, test loss 0.0321, time 0.6 sec
epoch 9, train loss 0.0368, test loss 0.0314, time 0.6 sec
epoch 10, train loss 0.0362, test loss 0.0309, time 0.5 sec
epoch 11, train loss 0.0354, test loss 0.0310, time 0.6 sec
epoch 12, train loss 0.0350, test loss 0.0301, time 0.6 sec
epoch 13, train loss 0.0347, test loss 0.0303, time 0.6 sec
epoch 14, train loss 0.0345, test loss 0.0301, time 0.6 sec
epoch 15, train loss 0.0342, test loss 0.0308, time 0.6 sec
epoch 16, train loss 0.0342, test loss 0.0301, time 0.6 sec
epoch 17, train loss 0.0342, t

(0.6475849731663685, 0.029863906887648145)

## Problema 2

Neste problema, você receberá várias *features* (como altura média, inclinação, etc) descrevendo uma região e o modelo deve predizer qual o tipo da região (floresta, montanha, etc).

In [117]:
#!wget http://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz
#!gzip covtype.data.gz

# Cria o dataset com um certo batch size a partir das features e labels
def load_array(features, labels, batch_size, is_train=True):
    dataset = gluon.data.ArrayDataset(features, labels)
    return gluon.data.DataLoader(dataset, batch_size, shuffle=is_train)

# Abre os dados
data = np.genfromtxt('covtype.data', delimiter=',', dtype=np.float32)
X, y = data[:, :-1], data[:, -1]

# Pré processamento dos dados
y_min, y_ptp = y.min(), y.ptp() # Parametros da normalização
X_min, X_ptp = X.min(0), X.ptp(0) # Parametros da normalização

y = (y - y_min) / y_ptp # Normalização de y
X = (X - X_min) / X_ptp # Normalização de X

# Divide os dados nos dados de treino e validação
train_features, test_features, train_labels, test_labels = train_test_split(X, y, test_size=0.33, random_state=42)
  
# Definição dos dados de treino/validação
batch_size = 128
train_iter = load_array(train_features, train_labels, batch_size)
test_iter = load_array(test_features, test_labels, batch_size, False)

# Criação da MLP
net = gluon.nn.Sequential()
net.add(gluon.nn.Dense(32, activation="relu"))
net.add(gluon.nn.Dense(8, activation="relu"))
net.add(gluon.nn.Dense(1))
 
# Inicialização dos pesos
net.collect_params().initialize(mx.init.Normal(sigma=0.1), ctx=ctx)

# Otimizador - ADAM
trainer = gluon.Trainer(net.collect_params(), 'adam')

# Função de custo - L2
loss = gluon.loss.L2Loss()

# Treina o modelo
train_validate(net, train_iter, test_iter, batch_size, trainer, loss, ctx, 30)

# Testa o modelo
evaluate_accuracy(test_iter, net, loss, ctx)

training on gpu(0)
epoch 1, train loss 0.0164, test loss 0.0146, time 18.6 sec
epoch 2, train loss 0.0140, test loss 0.0135, time 18.4 sec
epoch 3, train loss 0.0132, test loss 0.0133, time 19.2 sec
epoch 4, train loss 0.0127, test loss 0.0124, time 18.5 sec
epoch 5, train loss 0.0123, test loss 0.0123, time 18.7 sec
epoch 6, train loss 0.0121, test loss 0.0121, time 18.5 sec
epoch 7, train loss 0.0119, test loss 0.0126, time 18.6 sec
epoch 8, train loss 0.0117, test loss 0.0120, time 19.1 sec
epoch 9, train loss 0.0116, test loss 0.0115, time 18.6 sec
epoch 10, train loss 0.0115, test loss 0.0114, time 18.6 sec
epoch 11, train loss 0.0114, test loss 0.0118, time 18.4 sec
epoch 12, train loss 0.0113, test loss 0.0113, time 18.6 sec
epoch 13, train loss 0.0112, test loss 0.0110, time 18.8 sec
epoch 14, train loss 0.0111, test loss 0.0111, time 18.6 sec
epoch 15, train loss 0.0111, test loss 0.0113, time 18.6 sec
epoch 16, train loss 0.0110, test loss 0.0110, time 18.7 sec
epoch 17, trai

(0.36497439160503614, 0.01064777200179513)

## Problema 3

Neste problema, você receberá 90 *features* extraídas de diversas músicas (datadas de 1922 até 2011) e deve predizer o ano de cada música.

In [123]:
#!wget http://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip
#!unzip YearPredictionMSD.txt.zip

# Cria o dataset com um certo batch size a partir das features e labels
def load_array(features, labels, batch_size, is_train=True):
    dataset = gluon.data.ArrayDataset(features, labels)
    return gluon.data.DataLoader(dataset, batch_size, shuffle=is_train)

# Abre os dados
data = np.genfromtxt('YearPredictionMSD.txt', delimiter=',', dtype=np.float32)
X, y = data[:, 1:], data[:, 0]

# Pré processamento dos dados
y_min, y_ptp = y.min(), y.ptp() # Parametros da normalização
X_min, X_ptp = X.min(0), X.ptp(0) # Parametros da normalização

y = (y - y_min) / y_ptp # Normalização de y
X = (X - X_min) / X_ptp # Normalização de X

# Divide os dados nos dados de treino e validação
train_features, test_features, train_labels, test_labels = train_test_split(X, y, test_size=0.33, random_state=42)
  
# Definição dos dados de treino/validação
batch_size = 128
train_iter = load_array(train_features, train_labels, batch_size)
test_iter = load_array(test_features, test_labels, batch_size, False)

# Criação da MLP
net = gluon.nn.Sequential()
net.add(gluon.nn.Dense(8, activation="relu"))
net.add(gluon.nn.Dense(1))
  
# Inicialização dos pesos
net.collect_params().initialize(mx.init.Normal(sigma=0.1), ctx=ctx)

# Otimizador - ADAM
trainer = gluon.Trainer(net.collect_params(), 'adam')

# Função de custo - L2
loss = gluon.loss.L2Loss()

# Treina o modelo
train_validate(net, train_iter, test_iter, batch_size, trainer, loss, ctx, 30)

# Testa o modelo
evaluate_accuracy(test_iter, net, loss, ctx)

training on gpu(0)
epoch 1, train loss 0.0093, test loss 0.0071, time 14.5 sec
epoch 2, train loss 0.0066, test loss 0.0062, time 14.4 sec
epoch 3, train loss 0.0061, test loss 0.0060, time 14.4 sec
epoch 4, train loss 0.0060, test loss 0.0059, time 14.8 sec
epoch 5, train loss 0.0060, test loss 0.0059, time 14.6 sec
epoch 6, train loss 0.0059, test loss 0.0059, time 14.4 sec
epoch 7, train loss 0.0059, test loss 0.0059, time 14.7 sec
epoch 8, train loss 0.0059, test loss 0.0058, time 14.4 sec
epoch 9, train loss 0.0059, test loss 0.0058, time 14.4 sec
epoch 10, train loss 0.0059, test loss 0.0058, time 14.5 sec
epoch 11, train loss 0.0058, test loss 0.0061, time 14.4 sec
epoch 12, train loss 0.0057, test loss 0.0057, time 14.3 sec
epoch 13, train loss 0.0057, test loss 0.0056, time 14.4 sec
epoch 14, train loss 0.0057, test loss 0.0058, time 14.3 sec
epoch 15, train loss 0.0057, test loss 0.0056, time 14.2 sec
epoch 16, train loss 0.0056, test loss 0.0059, time 14.5 sec
epoch 17, trai

(1.1760278483394486e-05, 0.0056996874441846155)