In [1]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
import torch
import torch.nn as nn
from torch import optim
from torch.autograd import Variable

In [2]:
import pandas as pd
import numpy as np

In [None]:
from model import EncoderRNN, AttnDecoder, train, timeSince, tensorFromArr, device

In [None]:
import time
import random
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker

In [3]:
data = np.load('data/seta.npy')

In [13]:
data_y = pd.read_csv('data/set_a.csv')
data_y = pd.get_dummies(data_y[['label']], dummy_na=True)
data_y.head()

Unnamed: 0,label_artifact,label_extrahls,label_murmur,label_normal,label_nan
0,1,0,0,0,0
1,1,0,0,0,0
2,1,0,0,0,0
3,1,0,0,0,0
4,1,0,0,0,0


In [14]:
# Drop unlabeled
data_x = data[data_y['label_nan']!=1, :]
data_y = data_y[data_y['label_nan']!=1]
data_y = data_y.drop('label_nan', axis=1).values

In [15]:
data_x[:, 1:] = MinMaxScaler().fit_transform(data_x[:, 1:])

In [16]:
print(data_x.shape[0], data_y.shape[0])

124 124


In [18]:
print(data_y.min(), data_y.max(), data_y.shape)

0 1 (124, 4)


In [None]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        input_tensor, target_tensor = tensorFromArr(data_x, data_y, random.choice(range(data_x.shape[0])))

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    return plot_losses

In [None]:
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [None]:
enc = EncoderRNN(data_x.shape[0], 100).to(device)
dec = AttnDecoder(100, 1).to(device)

In [None]:
pl = trainIters(enc, dec, 10000)

In [None]:
showPlot(pl)