In [28]:
import torch.nn as nn
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
import torch.optim as optim
import mlflow # mlflow 사용을 위해

# Model 정의

In [29]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784,100)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(100,100)
        self.fc3 = nn.Linear(100,10)
    def forward(self, x):
        x1 = self.fc1(x)
        x2 = self.relu(x1)
        x3 = self.fc2(x2)
        x4 = self.relu(x3)
        x5 = self.fc3(x4)

        return x5

# Dataset 정의  
## MNIST Dataset을 사용하여 학습,검증을 합니다.

In [30]:
download_root = 'MNIST_data/'

train_dataset = datasets.MNIST(root=download_root,
                         train=True,
                         transform = transforms.ToTensor(),
                         download=True)
                         
test_dataset = datasets.MNIST(root=download_root,
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)    

# Batch_size, Train, Test Dataloader 정의

In [None]:
batch_size = 100
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# 학습률, Optimizer 정의

In [35]:
model = Net()
model.zero_grad()
loss_function = nn.CrossEntropyLoss()
learning_rate = 0.03
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Seed 고정


In [32]:
import torch.backends.cudnn as cudnn
import random
seed = 0
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
cudnn.benchmark = False
cudnn.deterministic = True
random.seed(seed)

# MLflow를 활용하여 학습진행

In [36]:
experiment_name = 'mnist' # 실험명, 실험관리를 용이하게 해줍니다. 


if not mlflow.get_experiment_by_name(experiment_name): 
  mlflow.create_experiment(name=experiment_name)
experiment = mlflow.get_experiment_by_name(experiment_name)

mlflow.set_tracking_uri('http://127.0.0.1:5000')
#mlflow.set_tag("mlflow.runName","practice")

train_loss_list = []
train_acc_list = []

val_loss_list = []
val_acc_list = []

total_batch = len(train_loader)
epochs = 20


best_accuracy = 0
with mlflow.start_run(experiment_id=experiment.experiment_id,run_name="autoever"):
  for epoch in range(epochs):
      cost=0
      model.train()
      train_accuracy = 0
      train_loss = 0
      for images, labels in train_loader:
          images = images.reshape(100,784)
          
          optimizer.zero_grad() # 변화도 매개변수 0
          
          #forward
          #pred = model.forward(images)
          #loss = loss_function(pred, labels)
          pred = model(images)
          loss = loss_function(pred,labels)
          prediction = torch.argmax(pred,1)
          correct = (prediction == labels)
          train_accuracy += correct.sum().item() / 60000
          train_loss += loss.item() / 600
          
          #backward
          loss.backward()
          
          #Update
          optimizer.step()
          
          cost += loss
      
      with torch.no_grad(): #미분하지 않겠다는 것
          total = 0
          correct=0
          for images, labels in test_loader:
              images = images.reshape(100,784)

              outputs = model(images)
              _,predict = torch.max(outputs.data, 1)

              total += labels.size(0)
              correct += (predict==labels).sum() # 예측한 값과 일치한 값의 합

      avg_cost = cost / total_batch
      accuracy = 100*correct/total
      
      val_loss_list.append(avg_cost.detach().numpy())
      val_acc_list.append(accuracy)

      if accuracy > best_accuracy:
        torch.save(model.state_dict(),'model.pt')
        best_accuracy = accuracy
        print(f"Save Model(Epoch: {epoch+1}, Accuracy: {best_accuracy:.5})")
      
      print("epoch : {} | loss : {:.6f}" .format(epoch+1, avg_cost))
      print("Accuracy : {:.2f}".format(100*correct/total))
      mlflow.log_param('learning-rate',learning_rate) # mlflow.log_param 을 사용하여 MLflow에 파라미터들을 기록할 수 있습니다.
      mlflow.log_param('epoch',epochs)
      mlflow.log_param('batch_size',batch_size)
      mlflow.log_param('seed',seed)
      mlflow.log_metric('train_accuracy',train_accuracy) # mlflow.log_metric을 사용하여 MLflow에 성능평가를 위한 metric을 기록할 수 있습니다.
      mlflow.log_metric('train_loss',train_loss)
      mlflow.log_metric('valid_accuracy',accuracy)
      mlflow.log_metric('valid_loss',avg_cost)
      mlflow.pytorch.log_model(model,'model') # pytorch.log_model 을 통해 모델을 저장할 수 있습니다.
      print("------")
mlflow.end_run()

Save Model(Epoch: 1, Accuracy: 87.95)
epoch : 1 | loss : 1.166269
Accuracy : 87.95
------
Save Model(Epoch: 2, Accuracy: 90.67)
epoch : 2 | loss : 0.384078
Accuracy : 90.67
------
Save Model(Epoch: 3, Accuracy: 91.78)
epoch : 3 | loss : 0.315243
Accuracy : 91.78
------
Save Model(Epoch: 4, Accuracy: 92.54)
epoch : 4 | loss : 0.279034
Accuracy : 92.54
------
Save Model(Epoch: 5, Accuracy: 93.05)
epoch : 5 | loss : 0.251406
Accuracy : 93.05
------
Save Model(Epoch: 6, Accuracy: 93.8)
epoch : 6 | loss : 0.228394
Accuracy : 93.80
------
Save Model(Epoch: 7, Accuracy: 94.31)
epoch : 7 | loss : 0.208147
Accuracy : 94.31
------
Save Model(Epoch: 8, Accuracy: 94.7)
epoch : 8 | loss : 0.190687
Accuracy : 94.70
------
Save Model(Epoch: 9, Accuracy: 94.93)
epoch : 9 | loss : 0.175343
Accuracy : 94.93
------
Save Model(Epoch: 10, Accuracy: 95.36)
epoch : 10 | loss : 0.162055
Accuracy : 95.36
------
Save Model(Epoch: 11, Accuracy: 95.47)
epoch : 11 | loss : 0.150416
Accuracy : 95.47




------
Save Model(Epoch: 12, Accuracy: 95.73)
epoch : 12 | loss : 0.140279
Accuracy : 95.73




------
Save Model(Epoch: 13, Accuracy: 96.05)
epoch : 13 | loss : 0.130966
Accuracy : 96.05




------
Save Model(Epoch: 14, Accuracy: 96.21)
epoch : 14 | loss : 0.122373
Accuracy : 96.21




------
Save Model(Epoch: 15, Accuracy: 96.41)
epoch : 15 | loss : 0.115121
Accuracy : 96.41




------
Save Model(Epoch: 16, Accuracy: 96.49)
epoch : 16 | loss : 0.108121
Accuracy : 96.49




------
Save Model(Epoch: 17, Accuracy: 96.61)
epoch : 17 | loss : 0.102148
Accuracy : 96.61




------
Save Model(Epoch: 18, Accuracy: 96.71)
epoch : 18 | loss : 0.096743
Accuracy : 96.71




------
epoch : 19 | loss : 0.091272
Accuracy : 96.60




------
Save Model(Epoch: 20, Accuracy: 96.92)
epoch : 20 | loss : 0.086725
Accuracy : 96.92




------
