In [None]:
! pip install mlflow # mlflow
! pip install bentoml # bentoml 
! pip install pyngrok # 로컬 개발환경을 원격으로 공유해주는 라이브러리

In [1]:
import torch.nn as nn
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
import torch.optim as optim
import mlflow # mlflow 사용을 위해

  from .autonotebook import tqdm as notebook_tqdm


# Model 정의

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784,100)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(100,100)
        self.fc3 = nn.Linear(100,10)
    def forward(self, x):
        x1 = self.fc1(x)
        x2 = self.relu(x1)
        x3 = self.fc2(x2)
        x4 = self.relu(x3)
        x5 = self.fc3(x4)

        return x5


# Dataset 정의  
## MNIST Dataset을 사용하여 학습,검증을 합니다.

In [3]:
download_root = 'MNIST_data/'

train_dataset = datasets.MNIST(root=download_root,
                         train=True,
                         transform = transforms.ToTensor(),
                         download=True)
                         
test_dataset = datasets.MNIST(root=download_root,
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)    

# Batch_size, Train, Test Dataloader 정의

In [4]:
batch_size = 100
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


# 학습률, Optimizer 정의

In [5]:
model = Net()
model.zero_grad()
loss_function = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = optim.SGD(model.parameters(), lr=learning_rate)


# Epoch 정의 (10번)


In [6]:
total_batch = len(train_loader)
epochs = 10
print(len(train_loader))

600


# MLflow, Ngrok 환경설정

In [7]:
experiment_name = 'mnist_practice' # 실험명, 실험관리를 용이하게 해줍니다. 


if not mlflow.get_experiment_by_name(experiment_name): 
  mlflow.create_experiment(name=experiment_name)
experiment = mlflow.get_experiment_by_name(experiment_name)

In [9]:
import os
MLFLOW_TRACKING_URI = os.environ['MLFLOW_TRACKING_URI'] = 'http://127.0.0.1:5000'

print(MLFLOW_TRACKING_URI)

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI) # 로컬 서버에 실행을 기록하기 위해 함수 호출



http://127.0.0.1:5000


# Seed 고정

In [8]:
import torch.backends.cudnn as cudnn
import random
seed = 0
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
cudnn.benchmark = False
cudnn.deterministic = True
random.seed(seed)

# Train, Track
## 학습을 진행하고 MLflow로 학습을 기록하는 부분입니다.

In [27]:
train_loss_list = []
train_acc_list = []

val_loss_list = []
val_acc_list = []

best_accuracy = 0
with mlflow.start_run(experiment_id=experiment.experiment_id):
  for epoch in range(epochs):
      cost=0
      model.train()
      train_accuracy = 0
      train_loss = 0
      for images, labels in train_loader:
          images = images.reshape(100,784)
          
          optimizer.zero_grad() # 변화도 매개변수 0
          
          #forward
          #pred = model.forward(images)
          #loss = loss_function(pred, labels)
          pred = model(images)
          loss = loss_function(pred,labels)
          prediction = torch.argmax(pred,1)
          correct = (prediction == labels)
          train_accuracy += correct.sum().item() / 60000
          train_loss += loss.item() / 600
          
          #backward
          loss.backward()
          
          #Update
          optimizer.step()
          
          cost += loss
      
      with torch.no_grad(): #미분하지 않겠다는 것
          total = 0
          correct=0
          for images, labels in test_loader:
              images = images.reshape(100,784)

              outputs = model(images)
              _,predict = torch.max(outputs.data, 1)

              total += labels.size(0)
              correct += (predict==labels).sum() # 예측한 값과 일치한 값의 합

      avg_cost = cost / total_batch
      accuracy = 100*correct/total
      
      val_loss_list.append(avg_cost.detach().numpy())
      val_acc_list.append(accuracy)

      if accuracy > best_accuracy:
        torch.save(model.state_dict(),'model.pt')
        best_accuracy = accuracy
        print(f"Save Model(Epoch: {epoch+1}, Accuracy: {best_accuracy:.5})")
      
      print("epoch : {} | loss : {:.6f}" .format(epoch+1, avg_cost))
      print("Accuracy : {:.2f}".format(100*correct/total))
      mlflow.log_param('learning-rate',learning_rate) # mlflow.log_param 을 사용하여 MLflow에 파라미터들을 기록할 수 있습니다.
      mlflow.log_param('epoch',epochs)
      mlflow.log_param('batch_size',batch_size)
      mlflow.log_param('seed',seed)
      mlflow.log_metric('train_accuracy',train_accuracy) # mlflow.log_metric을 사용하여 MLflow에 성능평가를 위한 metric을 기록할 수 있습니다.
      mlflow.log_metric('train_loss',train_loss)
      mlflow.log_metric('valid_accuracy',accuracy)
      mlflow.log_metric('valid_loss',avg_cost)
      mlflow.pytorch.log_model(model,'model') # pytorch.log_model 을 통해 모델을 저장할 수 있습니다.
      print("------")



Save Model(Epoch: 1, Accuracy: 70.65)
epoch : 1 | loss : 2.043801
Accuracy : 70.65




------




Save Model(Epoch: 2, Accuracy: 84.6)
epoch : 2 | loss : 0.884435
Accuracy : 84.60




------




Save Model(Epoch: 3, Accuracy: 88.2)
epoch : 3 | loss : 0.505807
Accuracy : 88.20




------




Save Model(Epoch: 4, Accuracy: 89.67)
epoch : 4 | loss : 0.408927
Accuracy : 89.67




------




Save Model(Epoch: 5, Accuracy: 90.39)
epoch : 5 | loss : 0.365257
Accuracy : 90.39




------




Save Model(Epoch: 6, Accuracy: 91.05)
epoch : 6 | loss : 0.338573
Accuracy : 91.05




------




Save Model(Epoch: 7, Accuracy: 91.55)
epoch : 7 | loss : 0.319484
Accuracy : 91.55




------




Save Model(Epoch: 8, Accuracy: 91.77)
epoch : 8 | loss : 0.304454
Accuracy : 91.77




------




Save Model(Epoch: 9, Accuracy: 92.12)
epoch : 9 | loss : 0.291567
Accuracy : 92.12




------




Save Model(Epoch: 10, Accuracy: 92.55)
epoch : 10 | loss : 0.280728
Accuracy : 92.55




------
