[![Colabで開く](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/schwalbe1996/ds_media_intro/blob/main/chap15.ipynb)

# 15章「画像データの機械学習」

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
mnist = fetch_openml('mnist_784',data_home=".")
Xtrain,Xtest,Ytrain,Ytest = train_test_split(mnist.data, mnist.target, train_size=60000, random_state=0)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=1, metric='cosine')
model.fit(Xtrain, Ytrain)

In [None]:
Ypred = model.predict(Xtest)
print(Ypred[:10]) # 最初の10個の予測を出力
print(Ytest.to_numpy()[:10]) # 最初の10個の正解を出力

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
print(confusion_matrix(Ytest,Ypred))
print(accuracy_score(Ytest,Ypred))

In [None]:
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torch import nn

batch_size=32
training_data = datasets.MNIST('data', train=True, download=True, transform=ToTensor())
test_data = datasets.MNIST('data', train=False, download=True, transform=ToTensor())
train_dataloader = DataLoader(training_data, batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size)

In [None]:
x,y = next(iter(test_dataloader))
print(x.shape, y.shape)
print(y[0]) #ひとつめの画像の正解ラベルを表示

In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.middle = nn.Sequential(
            nn.Linear(784, 256),
            nn.Sigmoid()
        )
        self.last = nn.Linear(256, 10)
    
    def forward(self, x):
        out = x.reshape(x.size(0), -1)
        out = self.middle(out)
        out = self.last(out)
        return out

In [None]:
# GPUが使える場合はGPUを使う
device = "cuda" if torch.cuda.is_available() else "cpu"

model = MyModel().to(device)
loss_func = nn.CrossEntropyLoss() # 損失関数にクロスエントロピーを用いる
optimizer = torch.optim.Adam(model.parameters(), lr=1.0e-4) # 最適化手法にAdamを用いる
    
epochs = 10 # エポック数の設定．学習データをepochs回繰り返し用いる．
for epoch in range(epochs):
    train_loss = 0
    test_loss = 0
    correct = 0
    # 学習部分
    model.train()
    for i, (X,y) in enumerate(train_dataloader):
        X = X.to(device)
        y = y.to(device)
        
        model.zero_grad()
        y_pred = model(X)
        loss = loss_func(y_pred, y)        
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_loss /= len(train_dataloader)
    # テストデータを用いて損失と正解率を計算
    model.eval()
    for i, (X,y) in enumerate(test_dataloader):
        X = X.to(device)
        y = y.to(device)
        
        y_pred = model(X)
        loss = loss_func(y_pred, y)
        test_loss += loss.item()
        correct += (y_pred.argmax(dim=1) == y).sum().item()
            
    test_loss /= len(test_dataloader)
    correct /= len(test_dataloader.dataset)
    print('Epoch:', epoch, 'Train Loss:', train_loss, 'Test Loss:', test_loss, 'Accuracy:', correct)

In [None]:
x,y = next(iter(test_dataloader))
model.eval()
x = x.to(device)
pred = model(x).argmax(dim=1).cpu() # 予測部分
print(y[0:10]) # 最初の10個の正解ラベルを出力
print(pred[0:10]) # 最初の10個の予測を出力

In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(120, 84),
            nn.ReLU()
        )
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        out = self.layer1(x) # 6x12x12
        out = self.layer2(out) # 16x4x4
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [None]:
training_data = datasets.FashionMNIST('data', train=True, download=True, transform=ToTensor())
test_data = datasets.FashionMNIST('data', train=False, download=True, transform=ToTensor())
train_dataloader = DataLoader(training_data, batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size)