In [1]:
from PIL import Image
import os 
# from sklearn.linear_model import LinearRegression
# from sklearn.metrics import mean_squared_error, r2_score
import random

import numpy as np
import matplotlib.pyplot as plt

# Pytorch library import
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# scikit-learn library import
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

# Pandas library import
import pandas as pd

import torchvision as torchvision
import torchvision.transforms as transforms # 데이터 전처리를 위해 사용하는 라이브러리

In [2]:
# device = torch.device("cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:

images = '../../dataset/ver1/random_polygon/images/'
png_files = sorted([f for f in os.listdir(images) if f.endswith('.png') and f.split('.')[0].isdigit()]) # 순서대로
sorted_files = sorted(png_files, key=lambda x: int(x.split(".")[0]))

cnt = 0
x_all = []
for f in sorted_files:
    path = os.path.join(images, f)
    x = Image.open(path)
    x_array = np.array(x)
    x_all.append(x_array)
    cnt += 1

    if cnt == 204800:
        break
    
print("random_polygon : ", cnt)

images = '../../dataset/ver1/random_ellipse/images/'
png_files = sorted([f for f in os.listdir(images) if f.endswith('.png') and f.split('.')[0].isdigit()]) # 순서대로
sorted_files = sorted(png_files, key=lambda x: int(x.split(".")[0]))

cnt = 0
for f in sorted_files:
    path = os.path.join(images, f)
    x = Image.open(path)
    x_array = np.array(x)
    x_all.append(x_array)
    cnt += 1

    # if cnt == 100000:
    #     break
    
print("random_ellipse : ", cnt)

images = '../../dataset/ver1/random_rectangular/images/'
png_files = sorted([f for f in os.listdir(images) if f.endswith('.png') and f.split('.')[0].isdigit()]) # 순서대로
sorted_files = sorted(png_files, key=lambda x: int(x.split(".")[0]))

cnt = 0
for f in sorted_files:
    path = os.path.join(images, f)
    x = Image.open(path)
    x_array = np.array(x)
    x_all.append(x_array)
    cnt += 1

    # if cnt == 100000:
    #     break

print("random_rectangular : ", cnt)

x_all = np.array(x_all)

print("x_all : ", x_all.shape)



random_polygon :  204800
random_ellipse :  204800
random_rectangular :  204800
x_all :  (614400, 100, 100)


In [4]:
def getDerivativeYaw(yaw_init, yaw_tar):
    _dyaw_all = yaw_tar - yaw_init
    _dyaw_all_len = _dyaw_all.size
    dyaw = np.zeros(_dyaw_all_len)

    for i in list(range(_dyaw_all_len)):
        _dyaw = _dyaw_all[i]

        if _dyaw >= 0:
            if _dyaw <= 180/360:
                dyaw[i] = _dyaw
            else:
                dyaw[i] = _dyaw - 360/360
        else:
            if _dyaw >= -180/360:
                dyaw[i] = _dyaw
            else:
                dyaw[i] = _dyaw + 360/360

    return dyaw


# read csv file into a pandas dataframe
y_polygon = pd.read_csv("../../dataset/ver1/random_polygon/dataset.csv", header=None)
y_ellipse = pd.read_csv("../../dataset/ver1/random_ellipse/dataset.csv", header=None)
y_rectangular = pd.read_csv("../../dataset/ver1/random_rectangular/dataset.csv", header=None)

# convert pandas dataframe to numpy array
# pos_init_world
pos_init_world_polygon = y_polygon.values[:,1:3]
pos_init_world_ellipse = y_ellipse.values[:,1:3]
pos_init_world_rectangular = y_rectangular.values[:,1:3]

# yaw_init_world
yaw_init_world_polygon = y_polygon.values[:,3] - 90 # change frame
yaw_init_world_polygon[yaw_init_world_polygon < 0] += 360 # change frame
yaw_init_world_polygon /= 360 # normalization

yaw_init_world_ellipse = y_ellipse.values[:,3] - 90 # change frame
yaw_init_world_ellipse[yaw_init_world_ellipse < 0] += 360 # change frame
yaw_init_world_ellipse /= 360 # normalization

yaw_init_world_rectangular = y_rectangular.values[:,3] - 90 # change frame
yaw_init_world_rectangular[yaw_init_world_rectangular < 0] += 360 # change frame
yaw_init_world_rectangular /= 360 # normalization

# pos_tar_world
pos_tar_world_polygon = y_polygon.values[:,4:6]
pos_tar_world_ellipse = y_ellipse.values[:,4:6]
pos_tar_world_rectangular = y_rectangular.values[:,4:6]

# yaw_tar_world
yaw_tar_world_polygon = y_polygon.values[:,6] - 90 # change frame
yaw_tar_world_polygon[yaw_tar_world_polygon < 0] += 360 # change frame
yaw_tar_world_polygon /= 360 # normalization

yaw_tar_world_ellipse = y_ellipse.values[:,6] - 90 # change frame
yaw_tar_world_ellipse[yaw_tar_world_ellipse < 0] += 360 # change frame
yaw_tar_world_ellipse /= 360 # normalization

yaw_tar_world_rectangular = y_rectangular.values[:,6] - 90 # change frame
yaw_tar_world_rectangular[yaw_tar_world_rectangular < 0] += 360 # change frame
yaw_tar_world_rectangular /= 360 # normalization

# label
isSuccess_polygon = y_polygon.values[:,7]
isSuccess_ellipse = y_ellipse.values[:,7]
isSuccess_rectangular = y_rectangular.values[:,7]

# pos_tar_local ref.world_frame
pos_tar_local_polygon = pos_tar_world_polygon - pos_init_world_polygon
pos_tar_local_polygon[:,0] = np.cos(-np.radians(yaw_init_world_polygon*360))*pos_tar_local_polygon[:,0] - np.sin(-np.radians(yaw_init_world_polygon*360))*(-pos_tar_local_polygon[:,1])  
pos_tar_local_polygon[:,1] = np.sin(-np.radians(yaw_init_world_polygon*360))*pos_tar_local_polygon[:,0] + np.cos(-np.radians(yaw_init_world_polygon*360))*(-pos_tar_local_polygon[:,1])

pos_tar_local_ellipse = pos_tar_world_ellipse - pos_init_world_ellipse
pos_tar_local_ellipse[:,0] = np.cos(-np.radians(yaw_init_world_ellipse*360))*pos_tar_local_ellipse[:,0] - np.sin(-np.radians(yaw_init_world_ellipse*360))*(-pos_tar_local_ellipse[:,1])  
pos_tar_local_ellipse[:,1] = np.sin(-np.radians(yaw_init_world_ellipse*360))*pos_tar_local_ellipse[:,0] + np.cos(-np.radians(yaw_init_world_ellipse*360))*(-pos_tar_local_ellipse[:,1])

pos_tar_local_rectangular = pos_tar_world_rectangular - pos_init_world_rectangular
pos_tar_local_rectangular[:,0] = np.cos(-np.radians(yaw_init_world_rectangular*360))*pos_tar_local_rectangular[:,0] - np.sin(-np.radians(yaw_init_world_rectangular*360))*(-pos_tar_local_rectangular[:,1])  
pos_tar_local_rectangular[:,1] = np.sin(-np.radians(yaw_init_world_rectangular*360))*pos_tar_local_rectangular[:,0] + np.cos(-np.radians(yaw_init_world_rectangular*360))*(-pos_tar_local_rectangular[:,1])

# dyaw
dyaw_polygon = getDerivativeYaw(yaw_init_world_polygon, yaw_tar_world_polygon)
dyaw_ellipse = getDerivativeYaw(yaw_init_world_ellipse, yaw_tar_world_ellipse)
dyaw_rectangular = getDerivativeYaw(yaw_init_world_rectangular, yaw_tar_world_rectangular)

# 100000
# y_polygon = y_polygon.values[:100000,1:]
# y_ellipse = y_ellipse.values[:100000,1:]
# y_rectangular = y_rectangular.values[:100000,1:]

# pos_init_world = np.concatenate((pos_init_world_polygon, pos_init_world_ellipse, pos_init_world_rectangular), axis=0)
# yaw_init_world = np.concatenate((yaw_init_world_polygon, yaw_init_world_ellipse, yaw_init_world_rectangular), axis=0)
# pos_tar_world = np.concatenate((pos_tar_world_polygon, pos_tar_world_ellipse, pos_tar_world_rectangular), axis=0)
# yaw_tar_world = np.concatenate((yaw_tar_world_polygon, yaw_tar_world_ellipse, yaw_tar_world_rectangular), axis=0)

pos_tar_local = np.concatenate((pos_tar_local_polygon, pos_tar_local_ellipse, pos_tar_local_rectangular), axis=0)
yaw_init_world = np.concatenate((yaw_init_world_polygon, yaw_init_world_ellipse, yaw_init_world_rectangular), axis=0)
dyaw = np.concatenate((dyaw_polygon, dyaw_ellipse, dyaw_rectangular), axis=0) # -1 ~ 1 (related)
isSuccess = np.concatenate((isSuccess_polygon, isSuccess_ellipse, isSuccess_rectangular), axis=0)



In [5]:
## split dataset to train, validation, test set

# x_train_all, x_test, y_train_all, y_test = train_test_split(x_all, y_all, test_size=0.2, random_state=42)
x_train, x_val, pos_train, pos_val, yaw_train, yaw_val, dyaw_train, dyaw_val, y_train, y_val = train_test_split(x_all, pos_tar_local, yaw_init_world, dyaw, isSuccess, test_size=0.2, random_state=42)

print("x_train: ",x_train.shape)
print("x_val: ",x_val.shape)
print("pos_train: ",pos_train.shape)
print("pos_val: ",pos_val.shape)
print("yaw_train: ",yaw_train.shape)
print("yaw_val: ",yaw_val.shape)
print("dyaw_train: ",dyaw_train.shape)
print("dyaw_val: ",dyaw_val.shape)
print("y_train: ",y_train.shape)
print("y_val: ",y_val.shape)


unique_train_all, count_train_all = np.unique(y_train, return_counts=True)
print("Unique values and their counts in y_train_all:", unique_train_all, count_train_all)

unique_test, count_test = np.unique(y_val, return_counts=True)
print("Unique values and their counts in y_test:", unique_test, count_test)

x_train:  (491520, 100, 100)
x_val:  (122880, 100, 100)
pos_train:  (491520, 2)
pos_val:  (122880, 2)
yaw_train:  (491520,)
yaw_val:  (122880,)
dyaw_train:  (491520,)
dyaw_val:  (122880,)
y_train:  (491520,)
y_val:  (122880,)
Unique values and their counts in y_train_all: [0. 1.] [260089 231431]
Unique values and their counts in y_test: [0. 1.] [64881 57999]


In [6]:
## image preprocessing

x_train = x_train.astype('float32')/255
x_val = x_val.astype('float32')/255
# x_test = x_test.astype('float32')/255

x_train = x_train.reshape(-1, 100, 100, 1)
x_val = x_val.reshape(-1, 100, 100, 1)
# x_test = x_test.reshape(-1, 100, 100, 1)

print(x_train.shape)
print(x_val.shape)
# print(x_test.shape)

pos_train = pos_train.reshape(-1, 2)
yaw_train = yaw_train.reshape(-1, 1)
dyaw_train = dyaw_train.reshape(-1, 1)
y_train = y_train.reshape(-1, 1)

pos_val = pos_val.reshape(-1, 2)
yaw_val = yaw_val.reshape(-1, 1)
dyaw_val = dyaw_val.reshape(-1, 1)
y_val = y_val.reshape(-1, 1)
# y_test = y_test.reshape(-1, 1)

print(pos_train.shape)
print(yaw_train.shape)
print(dyaw_train.shape)
print(y_train.shape)

print(pos_val.shape)
print(yaw_val.shape)
print(dyaw_val.shape)
print(y_val.shape)
# print(y_test.shape)

(491520, 100, 100, 1)
(122880, 100, 100, 1)
(491520, 2)
(491520, 1)
(491520, 1)
(491520, 1)
(122880, 2)
(122880, 1)
(122880, 1)
(122880, 1)


In [7]:
# 훈련 데이터 텐서 변환
x_train = torch.from_numpy(x_train).float()
pos_train = torch.from_numpy(pos_train).float()
yaw_train = torch.from_numpy(yaw_train).float()
dyaw_train = torch.from_numpy(dyaw_train).float()
y_train = torch.from_numpy(y_train).float()

# 테스트 데이터 텐서 변환
# x_test = torch.from_numpy(x_test).float()
# y_test = torch.from_numpy(y_test).float()
x_val = torch.from_numpy(x_val).float()
pos_val = torch.from_numpy(pos_val).float()
yaw_val = torch.from_numpy(yaw_val).float()
dyaw_val = torch.from_numpy(dyaw_val).float()
y_val = torch.from_numpy(y_val).float()

# 텐서로 변환한 데이터 건수 확인
print(x_train.shape)
print(pos_train.shape)
print(yaw_train.shape)
print(dyaw_train.shape)
print(y_train.shape)

print(x_val.shape)
print(pos_val.shape)
print(yaw_val.shape)
print(dyaw_val.shape)
print(y_val.shape)

# 설명변수와 목적변수의 텐서를 합침
train_dataset = TensorDataset(x_train, pos_train[:,0], pos_train[:,1], yaw_train, dyaw_train, y_train)
val_dataset = TensorDataset(x_val, pos_val[:,0], pos_val[:,1], yaw_val, dyaw_val, y_val)
# test_dataset = TensorDataset(x_test, y_test)

batch_size = 512

# dataset loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, drop_last=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, drop_last=True)
# test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, drop_last=True)

torch.Size([491520, 100, 100, 1])
torch.Size([491520, 2])
torch.Size([491520, 1])
torch.Size([491520, 1])
torch.Size([491520, 1])
torch.Size([122880, 100, 100, 1])
torch.Size([122880, 2])
torch.Size([122880, 1])
torch.Size([122880, 1])
torch.Size([122880, 1])


In [8]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.dropout = nn.Dropout(0.5)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(1156, 64) # 32 * 6 * 6 + 4
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x, pos_x, pos_y, yaw, dyaw):
        x = self.conv1(x)
        x = self.relu(x)
        x = F.max_pool2d(x, kernel_size=2)

        x = self.conv2(x)
        x = self.relu(x)
        x = F.max_pool2d(x, kernel_size=2)

        x = self.conv3(x)
        x = self.relu(x)
        x = F.max_pool2d(x, kernel_size=2)

        x = self.conv4(x)
        x = self.relu(x)
        x = F.max_pool2d(x, kernel_size=2)

        x = self.dropout(x)

        x = self.flatten(x)

        x = torch.cat((x, pos_x, pos_y, yaw, dyaw), dim=1)

        x = self.fc1(x)
        x = self.relu(x)

        x = self.fc2(x)
        x = self.relu(x)

        x = self.fc3(x)
        x = self.relu(x)

        x = self.fc4(x)
        x = self.sigmoid(x)

        return x

In [9]:
# 모델 인스턴스 생성
model = Network()
model.to(device)

learning_rate = 0.001

# criterion = nn.BCEWithLogitsLoss()  # binary cross entropy (use logit, add sigmoid)
# criterion = nn.CrossEntropyLoss() # cross entropy
criterion = nn.BCELoss() # binary cross entropy (use 0~1 probability)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

num_epochs = 1000
count = 0
threshold = 0.5
loss_list = []
iteration_list = []
accuracy_list = []

predictions_list = []
labels_list = []

# Initialize variables for Early Stopping
best_val_loss = float('inf')
patience = 5  # Number of epochs to wait before early stopping
current_patience = 0


for epoch in range(num_epochs):
  for images, pos_x, pos_y, yaw, dyaw, labels in train_loader:
    images, pos_x, pos_y, yaw, dyaw, labels = images.to(device), pos_x.to(device), pos_y.to(device), yaw.to(device), dyaw.to(device), labels.to(device)
    # train = Variable(images.view(100, 1, 100, 100))
    # labels = Variable(labels)
    images = images.view(batch_size, 1, 100, 100)
    pos_x = pos_x.view(batch_size, 1)
    pos_y = pos_y.view(batch_size, 1)
    yaw = yaw.view(batch_size, 1)
    dyaw = dyaw.view(batch_size, 1)
    outputs = model(images, pos_x, pos_y, yaw, dyaw)
    loss = criterion(outputs, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    count += 1

    del images # tensor in gpu delete
    del pos_x # tensor in gpu delete
    del pos_y # tensor in gpu delete
    del yaw # tensor in gpu delete
    del dyaw # tensor in gpu delete
    del labels # tensor in gpu delete
    torch.cuda.empty_cache()
    
    if not (count % len(train_loader)): # last iteration of each epoch
      total = 0
      correct = 0
      val_loss = 0

      model.eval()
      with torch.no_grad():

        for images, pos_x, pos_y, yaw, dyaw, labels in val_loader:
          images, pos_x, pos_y, yaw, dyaw, labels = images.to(device), pos_x.to(device), pos_y.to(device), yaw.to(device), dyaw.to(device), labels.to(device)
          # labels_list.append(labels)
          # val = Variable(images.view(batch_size, 1, 100, 100))
          images = images.view(batch_size, 1, 100, 100)
          pos_x = pos_x.view(batch_size, 1)
          pos_y = pos_y.view(batch_size, 1)
          yaw = yaw.view(batch_size, 1)
          dyaw = dyaw.view(batch_size, 1)
          outputs = model(images, pos_x, pos_y, yaw, dyaw)
          # predictions = torch.max(outputs, 1)[1].to(device) # cross entropy 
          # predictions = (outputs > 0).float()
          predictions = (outputs > threshold).float()
          # predictions_list.append(predictions)
          correct += (predictions == labels).sum()
          total += len(labels)

          _loss = criterion(outputs, labels)
          val_loss += _loss.item()
        
      val_loss /= len(val_loader)
      accuracy = correct * 100 / total
      # loss_list.append(loss.data)
      # iteration_list.append(count)
      # accuracy_list.append(accuracy)

      del images # tensor in gpu delete
      del pos_x # tensor in gpu delete
      del pos_y # tensor in gpu delete
      del yaw # tensor in gpu delete
      del dyaw # tensor in gpu delete
      torch.cuda.empty_cache()
 
  print("Epoch: {}, Iteration: {}, Loss: {}, Val_Loss: {}, Val Accuracy: {}%".format(epoch, count, loss.data, _loss.data, accuracy))
  
  # Check if validation loss has improved
  if val_loss < best_val_loss:
      best_val_loss = val_loss
      current_patience = 0
      # Save the model when validation loss improves
      # torch.save(model.state_dict(), 'best_model.pth')
      torch.save(model, '../models/20231203(2).pth')
  else:
      current_patience += 1
      if current_patience >= patience:
          print("Early stopping triggered. Training stopped.")
          break
      
    # if not (count%500):
    #   print("Iteration: {}, Loss: {}, Val Accuracy: {}%".format(count, loss.data, accuracy))

Epoch: 0, Iteration: 960, Loss: 0.13318029046058655, Val_Loss: 0.1491946429014206, Val Accuracy: 93.66130065917969%
Epoch: 1, Iteration: 1920, Loss: 0.13313838839530945, Val_Loss: 0.14550834894180298, Val Accuracy: 93.61979675292969%
Epoch: 2, Iteration: 2880, Loss: 0.1329226791858673, Val_Loss: 0.14469772577285767, Val Accuracy: 93.63607025146484%
Epoch: 3, Iteration: 3840, Loss: 0.1325298696756363, Val_Loss: 0.14597737789154053, Val Accuracy: 93.64502716064453%
Epoch: 4, Iteration: 4800, Loss: 0.13217654824256897, Val_Loss: 0.14435020089149475, Val Accuracy: 93.66048431396484%
Epoch: 5, Iteration: 5760, Loss: 0.133061483502388, Val_Loss: 0.14390000700950623, Val Accuracy: 93.70442962646484%
Epoch: 6, Iteration: 6720, Loss: 0.13118663430213928, Val_Loss: 0.1424216330051422, Val Accuracy: 93.96159362792969%
Epoch: 7, Iteration: 7680, Loss: 0.12989719212055206, Val_Loss: 0.13895121216773987, Val Accuracy: 94.07878112792969%
Epoch: 8, Iteration: 8640, Loss: 0.12849703431129456, Val_Loss: