In [22]:
# необходимые импорты
import numpy as np
import torch
import torchvision.datasets as dataset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, WeightedRandomSampler
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torchvision import models

import os
import cv2
import onnxruntime
import torch.onnx as onnx

In [19]:
train_path = r"E:\Datasets\WAGONS_EXISTING_START_DATASET_RTK_TOF\frames\v3\train"
test_path  = r"E:\Datasets\WAGONS_EXISTING_START_DATASET_RTK_TOF\frames\v3\train"

In [4]:
# определим объект трансформации изображений
transform = transforms.Compose([
      transforms.Resize((320, 320)),
      transforms.Grayscale(),
      transforms.ToTensor(),
])

In [5]:
train_data = dataset.ImageFolder(train_path, transform)
test_data = dataset.ImageFolder(test_path, transform)

print(type(train_data))
print(type(test_data))

print(train_data.classes)
print(test_data.classes)

<class 'torchvision.datasets.folder.ImageFolder'>
<class 'torchvision.datasets.folder.ImageFolder'>
['empty', 'wagons']
['empty', 'wagons']


In [33]:
print(train_data[0][0])

tensor([[[0.1529, 0.1529, 0.1529,  ..., 0.1333, 0.1333, 0.1333],
         [0.1529, 0.1529, 0.1529,  ..., 0.1333, 0.1333, 0.1333],
         [0.1529, 0.1529, 0.1529,  ..., 0.1333, 0.1333, 0.1333],
         ...,
         [0.0078, 0.0000, 0.0039,  ..., 0.2196, 0.2275, 0.2275],
         [0.0039, 0.0039, 0.0039,  ..., 0.2706, 0.2627, 0.2627],
         [0.0039, 0.0039, 0.0039,  ..., 0.2824, 0.2745, 0.2627]]])


In [6]:
# определим устройство, на котором будет идти обучение
device = None
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [8]:
state_dict = torch.load("resnet18_wagons_numbers_iter_9_2024-fold-3.pth", map_location=device)
inference_model = models.resnet18(num_classes=2, pretrained=False)
inference_model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
inference_model.load_state_dict(state_dict)
inference_model.eval()
inference_model.to(device)



ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [23]:
# создаем входной тензор модели
x = torch.randn(1, 1, 320, 320, requires_grad=True).to(device)

# сгенерируем выходы модели
out = inference_model(x)

# экспортируем модель в onnx
onnx.export(inference_model, x, "resnet18_wagons_numbers_iter_09_2024-fold-3_rtk.onnx", input_names=["image"], output_names=["output"], )

In [12]:
import onnx

onnx_model = onnx.load("resnet18_wagons_numbers_iter_09_2024-fold-3_rtk.onnx")
onnx.checker.check_model(onnx_model)

In [13]:
# инициализируем сессию  ONNXRuntime
ort_session = onnxruntime.InferenceSession("resnet18_wagons_numbers_iter_09_2024-fold-3_rtk.onnx", providers=["CPUExecutionProvider" if device == "cpu" else "CUDAExecutionProvider"])

# определим функцию перевода тензора в numpy-массив
def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# определим входы и выходы 
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
ort_outs = ort_session.run(None, ort_inputs)
print(ort_outs)
print(out)

# сравним результаты
np.testing.assert_allclose(to_numpy(out[0]), ort_outs[0][0], rtol=2e-02, atol=1e-03)
print("Все хорошо!")

[array([[  6.7854376, -10.482294 ]], dtype=float32)]
tensor([[  6.7819, -10.4788]], device='cuda:0', grad_fn=<AddmmBackward0>)
Все хорошо!


In [14]:
RESNET_INPUT_WIDTH = ort_session.get_inputs()[0].shape[2]
RESNET_INPUT_HEIGHT = ort_session.get_inputs()[0].shape[3]
RESNET_INPUT_CHANNELS = ort_session.get_inputs()[0].shape[1]

In [18]:
path_to_save_images_with_errors_onnx = r"wagons_errors_rtk_tof_resnet_18_v1_onnx\train_cv_pil_preprocess"

In [23]:
# CV2 preprocess (deprecated)

import shutil

total_images_count = 0
true_classified_images_count = 0

test_listdir = os.listdir(test_path)
for class_name in test_listdir:
    images_listdir = os.listdir(os.path.join(test_path, class_name))
    for i, image in enumerate(images_listdir):
        path_to_image = os.path.join(test_path, class_name, image)
        image_array_ = cv2.imdecode(np.fromfile(path_to_image, dtype=np.uint8), cv2.IMREAD_COLOR)
        image_array = cv2.cvtColor(image_array_, cv2.COLOR_BGR2GRAY)
        image_array = cv2.resize(image_array, (RESNET_INPUT_WIDTH, RESNET_INPUT_HEIGHT), cv2.INTER_CUBIC)
        image_array = image_array / 255
        #image_array = image_array.transpose(2, 0, 1)
        image_array = np.expand_dims(image_array, axis=(0, 1))
        outputs = ort_session.run(None, {ort_session.get_inputs()[0].name: image_array.astype("float32")})
        preds_id = outputs[0].argmax()
        class_name_pred = test_data.classes[preds_id]
        #print(class_name_pred + "\n")
        if class_name == class_name_pred:
            true_classified_images_count += 1
        else:
            print("\n" + f"label {class_name}, predicted {class_name_pred}")
            shutil.copy(path_to_image, os.path.join(path_to_save_images_with_errors_onnx, f"label_{class_name}__predicted_{class_name_pred}_{i}.jpg"))
            print(outputs)
        total_images_count += 1
        
        


label empty, predicted wagons
[array([[-0.20640664,  0.3218738 ]], dtype=float32)]

label empty, predicted wagons
[array([[-3.7841067,  3.7760658]], dtype=float32)]

label empty, predicted wagons
[array([[-2.987074,  3.022423]], dtype=float32)]

label empty, predicted wagons
[array([[0.0404208 , 0.41621625]], dtype=float32)]

label empty, predicted wagons
[array([[-2.9859667,  3.0089996]], dtype=float32)]

label empty, predicted wagons
[array([[-2.954782 ,  2.9804134]], dtype=float32)]

label empty, predicted wagons
[array([[-0.28321928,  0.3752904 ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.94171786,  1.0398479 ]], dtype=float32)]

label empty, predicted wagons
[array([[-1.5646654,  1.6214294]], dtype=float32)]

label empty, predicted wagons
[array([[-0.40008032,  0.4932381 ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.24041286,  0.33660764]], dtype=float32)]

label empty, predicted wagons
[array([[-0.08839291,  0.57146704]], dtype=float32)]

label

In [55]:
# PIL preprocess (deprecated)
import shutil
from PIL import Image

total_images_count = 0
true_classified_images_count = 0

test_listdir = os.listdir(test_path)
for class_name in test_listdir:
    images_listdir = os.listdir(os.path.join(test_path, class_name))
    for i, image in enumerate(images_listdir):
        path_to_image = os.path.join(test_path, class_name, image)
        image_array = Image.open(path_to_image)
        image_array = transform(image_array)
        image_array = image_array.unsqueeze(0)
        outputs = ort_session.run(None, {ort_session.get_inputs()[0].name: image_array.numpy().astype("float32")})
        preds_id = outputs[0].argmax()
        class_name_pred = test_data.classes[preds_id]
        #print(class_name_pred + "\n")
        if class_name == class_name_pred:
            true_classified_images_count += 1
        else:
            print("\n" + f"label {class_name}, predicted {class_name_pred}")
            shutil.copy(path_to_image, os.path.join(path_to_save_images_with_errors_onnx, f"label_{class_name}__predicted_{class_name_pred}_{i}.jpg"))
            print(outputs)
        total_images_count += 1


label empty, predicted wagons
[array([[0.00566266, 0.11723654]], dtype=float32)]

label empty, predicted wagons
[array([[-0.39880997,  0.4514043 ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.66727686,  0.706002  ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.73444426,  0.77235806]], dtype=float32)]

label empty, predicted wagons
[array([[-0.7095813,  0.7494876]], dtype=float32)]

label empty, predicted wagons
[array([[-1.0522299,  1.0800035]], dtype=float32)]

label empty, predicted wagons
[array([[-0.6033508 ,  0.64631003]], dtype=float32)]

label wagons, predicted empty
[array([[ 0.2649967, -0.1214422]], dtype=float32)]

label wagons, predicted empty
[array([[ 1.2315242, -1.0737884]], dtype=float32)]

label wagons, predicted empty
[array([[ 1.411121  , -0.45326486]], dtype=float32)]

label wagons, predicted empty
[array([[ 1.1551391, -0.6091871]], dtype=float32)]

label wagons, predicted empty
[array([[ 0.6462471 , -0.54365164]], dtype=float32)]

lab

In [20]:
# mixed cv2 and PIL preprocess
import cv2
from PIL import Image
import shutil

total_images_count = 0
true_classified_images_count = 0

test_listdir = os.listdir(test_path)
for class_name in test_listdir:
    images_listdir = os.listdir(os.path.join(test_path, class_name))
    for i, image in enumerate(images_listdir):
        path_to_image = os.path.join(test_path, class_name, image)
        image_array_ = cv2.imdecode(np.fromfile(path_to_image, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
        image_array = cv2.cvtColor(image_array_, cv2.COLOR_BGR2GRAY)
        image_array = np.array(Image.fromarray(image_array).resize((RESNET_INPUT_WIDTH, RESNET_INPUT_HEIGHT), Image.BILINEAR))
        image_array = image_array / 255
        #image_array = image_array.transpose(2, 0, 1)
        image_array = np.expand_dims(image_array, axis=(0, 1))
        outputs = ort_session.run(None, {ort_session.get_inputs()[0].name: image_array.astype("float32")})
        preds_id = outputs[0].argmax()
        class_name_pred = test_data.classes[preds_id]
        #print(class_name_pred + "\n")
        if class_name == class_name_pred:
            true_classified_images_count += 1
        else:
            print("\n" + f"label {class_name}, predicted {class_name_pred}")
            shutil.copy(path_to_image, os.path.join(path_to_save_images_with_errors_onnx, f"label_{class_name}__predicted_{class_name_pred}_{i}.jpg"))
            print(outputs)
        total_images_count += 1


label empty, predicted wagons
[array([[-0.82478905,  0.02299303]], dtype=float32)]

label empty, predicted wagons
[array([[-0.82478905,  0.02299303]], dtype=float32)]

label empty, predicted wagons
[array([[-0.82478905,  0.02299303]], dtype=float32)]

label empty, predicted wagons
[array([[-0.82478905,  0.02299303]], dtype=float32)]

label empty, predicted wagons
[array([[-0.82478905,  0.02299303]], dtype=float32)]

label empty, predicted wagons
[array([[-0.82478905,  0.02299303]], dtype=float32)]

label empty, predicted wagons
[array([[-0.76456225, -0.3226755 ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.76456225, -0.3226755 ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.76456225, -0.3226755 ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.76456225, -0.3226755 ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.76456225, -0.3226755 ]], dtype=float32)]

label empty, predicted wagons
[array([[-0.76456225, -0.3226755 ]], dtype=fl

In [16]:
### TEST CELL 

import cv2
from PIL import Image
import shutil
import onnxruntime

total_images_count = 0
true_classified_images_count = 0

device = None
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

ort_session = onnxruntime.InferenceSession(r"E:\Repositories\arscis-deep-learning\src\weights\experimental\classification\wagon_presence\resnet\novorossiysk\onnx\actual\resnet50_1_3_224_224_iter_06_2024.onnx", providers=["CPUExecutionProvider" if device == "cpu" else "CUDAExecutionProvider"])
test_path  = r"E:\Datasets\WAGONS_EXISTING_START_DATASET_RTK_TOF\frames\K71_K72_frames"
path_to_save_splitted_on_classes_frames = r"E:\Datasets\WAGONS_EXISTING_START_DATASET_RTK_TOF\frames\splitted_k71_k72_frames_on_classes_novor_classifier"

class_names = {0: "empty", 1: "wagons"}

RESNET_INPUT_WIDTH = ort_session.get_inputs()[0].shape[2]
RESNET_INPUT_HEIGHT = ort_session.get_inputs()[0].shape[3]
RESNET_INPUT_CHANNELS = ort_session.get_inputs()[0].shape[1]

images_listdir = os.listdir(test_path)
for i, image in enumerate(images_listdir):
    path_to_image = os.path.join(test_path, image)
    image_array_ = cv2.imdecode(np.fromfile(path_to_image, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
    image_array = cv2.cvtColor(image_array_, cv2.COLOR_BGR2GRAY)
    image_array = np.array(Image.fromarray(image_array).resize((RESNET_INPUT_WIDTH, RESNET_INPUT_HEIGHT), Image.BILINEAR))
    image_array = image_array / 255
    #image_array = image_array.transpose(2, 0, 1)
    image_array = np.expand_dims(image_array, axis=(0, 1))
    outputs = ort_session.run(None, {ort_session.get_inputs()[0].name: image_array.astype("float32")})
    preds_id = outputs[0].argmax()
    class_name_pred = class_names[preds_id]
    if class_name_pred.lower() == "empty":
        shutil.copy(path_to_image, os.path.join(path_to_save_splitted_on_classes_frames, "empty", os.path.basename(path_to_image)))
    elif class_name_pred.lower() == "wagons":
        shutil.copy(path_to_image, os.path.join(path_to_save_splitted_on_classes_frames, "wagons", os.path.basename(path_to_image)))
        

In [42]:
true_classified_images_count / total_images_count

0.9935387673956262