# AutoEncoderの実装

## AutoEncoderクラスの定義

In [None]:
import torch
import torch.nn as nn

class AE(nn.Module):
  def __init__(self, input_dim=10, hidden_dim=5):
    super(AE, self).__init__()
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim
    self.output_dim = input_dim

    self.encoder = nn.Linear(input_dim, hidden_dim)
    self.decoder = nn.Linear(hidden_dim, self.output_dim)

  def encode(self, data):
    return torch.sigmoid(self.encoder(data))

  def decode(self, data):
    return torch.sigmoid(self.decoder(data))

  def forward(self, data):
    encoded = self.encode(data)
    decoded = self.decode(encoded)

    return decoded



## dataの取得

sklearn の数字データとする

In [None]:
from sklearn.datasets import load_digits
from sklearn import svm
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

digits = load_digits()
x = digits.data / 16.
y = digits.target
images = digits.images /16.

x_train, x_test, y_train, y_test, images_train, images_test = train_test_split(x, y, images, train_size=0.7)


print(len(x[0,:]))
print(images[1])

n_features = len(x[0,:])
hidden_dim = n_features // 2

In [None]:
plt.imshow(images[1], cmap=plt.cm.gray_r)
plt.show()

## DataSetの作成

In [None]:
import numpy as np
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
  def __init__(self, datas, labels):
    super().__init__()

    self.len = len(labels)
    self.datas = datas
    self.labels = labels

  def __len__(self):
    return self.len

  def __getitem__(self, index):
    data = self.datas[index]
    label = self.labels[index]
    data = data.astype(np.float32)
    data = torch.from_numpy(data)
    return data, label




### DataLoaderの作成

In [None]:
batch_size = 16
dataset = MyDataset(x_train, y_train)
dataloader =  DataLoader(dataset, batch_size=batch_size, shuffle=True)

### 学習用の関数

In [None]:
def train(model, criterion, num_epoch):
  model.train()
  for epoch in range(num_epoch):
    for iter, (data, labels) in enumerate(dataloader,1):
      optimizer.zero_grad()
      pred = model(data)
      loss = criterion(pred, data)
      loss.backward() #誤差伝播
      optimizer.step() #パラメータ更新
      if (iter % 10 == 0 ):
        print("epoch[%d/%d] iter=%d: loss=%f" % (epoch+1, num_epoch, iter, loss.item()))
        #print("loss =", torch.sum((pred-data)**2)/len(pred[0]))



## モデルのインスタンス化と学習

In [None]:
ae1 = AE(input_dim=n_features, hidden_dim=hidden_dim)
criterion = nn.MSELoss()
optimizer = torch.optim.RMSprop(ae1.parameters())

train(ae1, criterion, 10)

### 再構成テスト

In [None]:
ae1.eval()
data = dataset[0][0]

pred = ae1.decode(ae1.encode(data))
pred = pred.detach().numpy()

plt.imshow(pred.reshape([8,8]), cmap=plt.cm.gray_r)
plt.show()

plt.imshow(data.reshape([8,8]), cmap=plt.cm.gray_r)
plt.show()

# 取り出した中間ベクトルの評価

## 圧縮前のベクトルでどれくらい識別できるか（10種類の数字の10クラス分類）

In [None]:
import numpy as np
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from mlxtend.plotting import plot_decision_regions

clf = linear_model.Perceptron()
clf.fit(x_train, y_train)

pred = clf.predict(x_test)
print(classification_report(y_test, pred))
print(accuracy_score(y_test, pred))
print(confusion_matrix(y_test, pred))


## 中間ベクトルでの識別結果

In [None]:
import numpy as np
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from mlxtend.plotting import plot_decision_regions

ae1.eval()
clf = linear_model.Perceptron()
encoded = ae1.encode(torch.from_numpy(x_train.astype(np.float32)))
clf.fit(encoded.detach().numpy(), y_train)

encoded = ae1.encode(torch.from_numpy(x_test.astype(np.float32)))
pred = clf.predict(encoded.detach().numpy())
print(classification_report(y_test, pred))
print(accuracy_score(y_test, pred))
print(confusion_matrix(y_test, pred))