In [26]:
import torch.nn as nn
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
import torch.optim as optim
import mlflow # mlflow 사용을 위해
import warnings

# Model 정의

In [27]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784,100) # 28*28
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(100,100)
        self.fc3 = nn.Linear(100,10)
    def forward(self, x):
        x1 = self.fc1(x)
        x2 = self.relu(x1)
        x3 = self.fc2(x2)
        x4 = self.relu(x3)
        x5 = self.fc3(x4)

        return x5

In [28]:
Net()

Net(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=10, bias=True)
)

# Dataset 정의  
## MNIST Dataset을 사용하여 학습,검증을 합니다.

In [29]:
download_root = 'MNIST_data/'

train_dataset = datasets.MNIST(root=download_root,
                         train=True,
                         transform = transforms.ToTensor(),
                         download=True)
                         
test_dataset = datasets.MNIST(root=download_root,
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)    

# Batch_size, Train, Test Dataloader 정의

In [30]:
batch_size = 100
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# 학습률, Optimizer 정의

In [31]:
model = Net()
model.zero_grad()
loss_function = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

# Seed 고정


In [32]:
import torch.backends.cudnn as cudnn
import random
seed = 0
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
cudnn.benchmark = False
cudnn.deterministic = True
random.seed(seed)

# MLflow를 활용하여 학습진행

In [40]:
warnings.filterwarnings(action='ignore')
experiment_name = 'chaos_AIP' # 실험명, 실험관리를 용이하게 해줍니다. 


if not mlflow.get_experiment_by_name(experiment_name): 
  mlflow.create_experiment(name=experiment_name)
experiment = mlflow.get_experiment_by_name(experiment_name)

mlflow.set_tracking_uri('http://10.5.187.118:30767') # 로컬 서버에 실행을 기록하기 위해 함수 호출

train_loss_list = []
train_acc_list = []

val_loss_list = []
val_acc_list = []

total_batch = len(train_loader)
epochs = 20


best_accuracy = 0
with mlflow.start_run(experiment_id=experiment.experiment_id,run_name="boom"):
  for epoch in range(epochs):
      cost=0
      model.train()
      train_accuracy = 0
      train_loss = 0
      for images, labels in train_loader:
          images = images.reshape(100,784)
          
          optimizer.zero_grad() # 변화도 매개변수 0
          
          #forward
          #pred = model.forward(images)
          #loss = loss_function(pred, labels)
          pred = model(images)
          loss = loss_function(pred,labels)
          prediction = torch.argmax(pred,1)
          correct = (prediction == labels)
          train_accuracy += correct.sum().item() / 60000
          train_loss += loss.item() / 600
          
          #backward
          loss.backward()
          
          #Update
          optimizer.step()
          
          cost += loss
      
      with torch.no_grad(): #미분하지 않겠다는 것
          total = 0
          correct=0
          for images, labels in test_loader:
              images = images.reshape(100,784)

              outputs = model(images)
              _,predict = torch.max(outputs.data, 1)

              total += labels.size(0)
              correct += (predict==labels).sum() # 예측한 값과 일치한 값의 합

      avg_cost = cost / total_batch
      accuracy = 100*correct/total
      
      val_loss_list.append(avg_cost.detach().numpy())
      val_acc_list.append(accuracy)

      if accuracy > best_accuracy:
        torch.save(model.state_dict(),'model.pt')
        mlflow.pytorch.log_model(model,'model')
        best_accuracy = accuracy
        print(f"Save Model(Epoch: {epoch+1}, Accuracy: {best_accuracy:.5})")
      
      print("epoch : {} | loss : {:.6f}" .format(epoch+1, avg_cost))
      print("Accuracy : {:.2f}".format(100*correct/total))
      mlflow.log_param('learning-rate',learning_rate) # mlflow.log_param 을 사용하여 MLflow에 파라미터들을 기록할 수 있습니다.
      mlflow.log_param('epoch',epochs)
      mlflow.log_param('batch_size',batch_size)
      mlflow.log_param('seed',seed)
      mlflow.log_param('optimizer',optimizer)
      mlflow.log_metric('train_accuracy',train_accuracy) # mlflow.log_metric을 사용하여 MLflow에 성능평가를 위한 metric을 기록할 수 있습니다.
      mlflow.log_metric('train_loss',train_loss)
      mlflow.log_metric('valid_accuracy',accuracy)
      mlflow.log_metric('valid_loss',avg_cost)
      print("------")
mlflow.end_run()

KeyboardInterrupt: 

In [34]:
import os
import json
import bentoml
import numpy as np
import pandas as pd
from torch import cuda

In [35]:
MLFLOW_PATH = './model_mlflow'

if not os.path.isdir(MLFLOW_PATH):
  mlflow.pytorch.save_model(model, MLFLOW_PATH) # mlflow 모델 로컬 디렉토리에 저장

bentoml.mlflow.import_model("mnist_clf", model_uri='./model_mlflow') # mlflow로 저장한 모델을 bentoml format에 맞추어 가져옴

Model(tag="mnist_clf:y7m546s2noh6f2kn", path="/home/khkim/bentoml/models/mnist_clf/y7m546s2noh6f2kn/")

In [36]:
!bentoml models list mnist_clf # 현재 등록되어 있는 모델 리스트 출력

[1m [0m[1mTag                       [0m[1m [0m[1m [0m[1mModule        [0m[1m [0m[1m [0m[1mSize      [0m[1m [0m[1m [0m[1mCreation Time      [0m[1m [0m
 mnist_clf:y7m546s2noh6f2kn  bentoml.mlflow  355.39 KiB  2022-11-02 14:04:07 
 mnist_clf:pycdv6sy3wh6f2kn  bentoml.mlflow  355.39 KiB  2022-10-31 14:33:04 
 mnist_clf:klquv2syykh6f2kn  bentoml.mlflow  355.39 KiB  2022-10-31 11:18:35 
 mnist_clf:xkvy4zsyygh6f2kn  bentoml.mlflow  355.39 KiB  2022-10-31 11:14:20 
 mnist_clf:v65qkwsyych6f2kn  bentoml.mlflow  355.39 KiB  2022-10-31 11:06:52 


In [3]:
# mnist 테스트 데이터 가져오기
with open('./test_input.json', 'r') as f:
  test_input_arr = np.array(json.load(f), dtype=np.float32)

FileNotFoundError: [Errno 2] No such file or directory: './test_input.json'

In [1]:
print(len(test_input_arr))

NameError: name 'test_input_arr' is not defined

In [38]:
# API 테스트
runner = bentoml.mlflow.get("mnist_clf:latest").to_runner()
runner.init_local()
runner.predict.run(test_input_arr)

'Runner.init_local' is for debugging and testing only.


array([[ -0.40538397,  -3.2523298 ,   3.0703802 ,   5.5641313 ,
         -5.7938633 ,   0.3673871 , -12.363699  ,  11.423534  ,
          1.1209784 ,   2.2345915 ],
       [  2.5325568 ,   5.260461  ,  11.839561  ,   5.9065714 ,
        -12.815116  ,   0.8554225 ,   3.3562913 ,  -7.457034  ,
          1.7260301 , -12.618624  ],
       [ -5.5542216 ,   7.381215  ,   0.96818936,  -0.879593  ,
         -1.4426495 ,  -1.2838367 ,   0.17502701,   2.1546166 ,
          1.496681  ,  -2.1817076 ]], dtype=float32)

In [39]:
logged_model = 'runs:/f35718cb70f542aa8b59eca0cc7e1a70/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

loaded_model.predict(pd.DataFrame(test_input_arr))


RestException: RESOURCE_DOES_NOT_EXIST: Run with id=f35718cb70f542aa8b59eca0cc7e1a70 not found

In [None]:
!bentoml serve service:svc --reload

2022-10-31T11:18:42+0900 [INFO] [cli] Prometheus metrics for HTTP BentoServer from "service:svc" can be accessed at http://localhost:3000/metrics.
2022-10-31T11:18:43+0900 [INFO] [cli] Starting development HTTP BentoServer from "service:svc" running on http://0.0.0.0:3000 (Press CTRL+C to quit)
2022-10-31 11:18:44 circus[2119121] [INFO] Loading the plugin...
2022-10-31 11:18:44 circus[2119121] [INFO] Endpoint: 'tcp://127.0.0.1:36727'
2022-10-31 11:18:44 circus[2119121] [INFO] Pub/sub: 'tcp://127.0.0.1:39683'
2022-10-31T11:18:44+0900 [INFO] [observer] Watching directories: ['/home/khkim/aip/AIP', '/home/khkim/bentoml/models']
^C
