
## <span style="color: red"> **Classification by RESNET18**</span> 

### <span style="color: red"> **Model ResNet18**</span> 

In [3]:
#building residual block (num = 8)
class Residual_block (nn.Module):
    def __init__(self, in_block, out_block, stride=1):
         super(Residual_block, self).__init__()
         self.conv1 = nn.Conv2d(in_block, out_block, kernel_size=3, stride=stride, padding=1, bias=False)
         self.bn1 = nn.BatchNorm2d(out_block)
         self.relu = nn.ReLU(inplace=True)
         self.conv2 = nn.Conv2d(out_block, out_block, kernel_size=3, stride=1, padding=1, bias=False)
         self.bn2 = nn.BatchNorm2d(out_block)

         self.skip_connect = nn.Sequential()
         if stride != 1 or in_block != out_block:
              self.skip_connect = nn.Sequential(
                nn.Conv2d(in_block, out_block, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_block)
            )
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.skip_connect(x)
        out = self.relu(out)
        return out
          
        

In [2]:
#building ResNet 18  (it is all architecture) 
class ResNet18 (nn.Module):
    def __init__(self, Residual_block, num_classes=10):
         super(ResNet18, self).__init__()
         self.in_block = 64                                               # 1 input image channel, 64 output channels, 7x7 square conv kernel
         self.conv1 = nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3, bias=False) #  на выходе получаем 64 шт. 112*112

         self.bn1 = nn.BatchNorm2d(64)
         self.relu = nn.ReLU(inplace=True)
         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  #  на выходе получаем 64 шт. 55*55
         
         self.layer1 = self.make_layer(Residual_block, 64, 2, stride=1)    #  создание 4 остаточных групп (2 - количество блоков в группе)
         self.layer2 = self.make_layer(Residual_block, 128, 2, stride=2)
         self.layer3 = self.make_layer(Residual_block, 256, 2, stride=2)
         self.layer4 = self.make_layer(Residual_block, 512, 2, stride=2)
        
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))      #  выход (1,1)
         self.drop = nn.Dropout2d()
         self.fc = nn.Linear(512, num_classes)

    def make_layer(self, block, out_block, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_block, out_block, stride))
            self.in_block = out_block
        return nn.Sequential(*layers)                             # выводит не список, а значения элементов списка
  
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = self.avgpool(out)
        out = out.view(out.size(0), -1)             # расчет функции потерь; view(-1) преобразует torch.Size([128, 1])  в torch.Size([128])
        out = self.drop(out)
        out = self.fc(out)
        # print (out.shape)
        # exit
        return out
    

In [4]:
def ResNet():
    return ResNet18(Residual_block)

### <span style="color: red"> **Обучение модели в MLFlow**</span> 

In [None]:
# mlflow server --backend-store-uri "file:///C:Users/Admin/ML_flow_Tracking/data_local" --default-artifact-root "file:///C:Users/Admin/ML_flow_Tracking/artefacts" --host localhost --port 5000

In [7]:
# Укажем юзера который делает эксперименты
os.environ['USER'] = 'Evgenii'

In [8]:
# Установка Seed для воспроизводимости (seed инициализирует ГСЧ, используемый в библиотеках.
#  Для того, чтобы можно было повторить эксперимерт кому-нибудь другому и получить одинаковые результаты указывают значение seed)
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# np.random.seed(seed)
# random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [9]:
# Инициализация MLflow
import mlflow.experiments

mlflow.set_tracking_uri('http://127.0.0.1:5000/')
mlflow.set_experiment('classif_model_my_MNIST')

<Experiment: artifact_location='file:C:/Users/Admin/My_project_1/Users/Admin/ML_flow_Tracking/artefact/839351144435899136', creation_time=1741689566276, experiment_id='839351144435899136', last_update_time=1741689566276, lifecycle_stage='active', name='classif_model_my_MNIST', tags={}>

In [10]:
# Отключаем вывод warning от MLflow
import logging
mlflow_logger = logging.getLogger("mlflow")
mlflow_logger.setLevel(logging.ERROR)

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#set hyperparameter
EPOCH = 30
LR = 0.001
momentum = 0
weight_decay = 0
drop = 'Yes'
opt = 'Adam' 
run_name='Exp_RN18_12'

In [None]:
# Начало MLflow запуска

with mlflow.start_run(run_name = run_name) as run:           # запуск эсперимента под именем "   "
    net = ResNet().to(device)
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=LR, 
                          weight_decay = weight_decay)                         # momentum определяет скорость затухания накапливаемых градиентов (величину импульса)
                                                                                 #momentum=0.9, weight_decay=5e-4 
    
    mlflow.log_param("momentum", momentum)
    mlflow.log_param("weight_decay", weight_decay)
    mlflow.log_param("lr", LR)
    mlflow.log_param("optimizer", opt)
    mlflow.log_param("epochs", EPOCH )
    mlflow.log_param("dropout", drop )


    maxacc = 0
    itr_record = 0

    for epoch in range(EPOCH):
        epoch += 1
        net.train()
        train_loss = 0.0
        test_loss = 0.0
        max_train_acc = 0
        max_test_acc = 0
        correct = 0.0
        train_samples = 0.0
        test_samples = 0.0

        print(f'Началось обучение {epoch} эпохи')
        
        for itr, data in enumerate(train_loader):
            # if itr == 10:
            #     break
            inputs, labels = data 
            inputs, labels = inputs.to(device), labels.to(device)
           
            outputs = net(inputs)
           
            loss = loss_func(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
            train_loss += loss.item() * outputs.size(0) 
            _, predicted = torch.max(outputs.data, 1)                  # _, predicted - значение тензора, номер индекса с этим максимальным значением
            train_samples += outputs.size(0)                            # .data связывает между собой два тензора
            
            correct += predicted.eq(labels.data).cpu().sum()                # суммирует количество совпадающих 1 с labels
                        

        train_loss /= train_samples
        train_acc = 100*correct / train_samples
        
        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("train_acc", train_acc, step=epoch)
        print(f'The Epoch {epoch}:')
        print(f'Train loss - {train_loss:.3f}, Train accuracy - {train_acc:.2f} %')

        net.eval()
        
        
        correct = 0
        
        with torch.no_grad():
            for itr, data in enumerate(test_loader):
                # if itr == 5:
                #     break
                inputs, labels = data 
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = net(inputs)
                loss = loss_func(outputs, labels)

                test_loss += loss.item() * outputs.size(0)
                _, predicted = torch.max(outputs.data, 1)                  
                test_samples += outputs.size(0)                            
            
                correct += predicted.eq(labels.data).cpu().sum()

        test_loss /= test_samples
        test_acc = 100*correct / test_samples
       
        mlflow.log_metric("test_loss", test_loss, step=epoch)
        mlflow.log_metric("test_acc", test_acc, step=epoch)
        print(f'Test loss - {test_loss:.3f}, Test accuracy - {test_acc:.2f} %')

        if test_acc > maxacc:
            print('Saving model because its better')
            maxacc = test_acc
            mlflow.pytorch.log_model(net, "model_my_ResNet18")
        print('-------')

    print(f'Max accuracy - {maxacc:.2f} %')
    mlflow.log_metric("max test accuracy", maxacc)

mlflow.end_run()

Началось обучение 1 эпохи
