# 재구축 데이터셋 Scratch



In [1]:
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets, models
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset
from PIL import Image
import numpy as np
from tqdm import tqdm

In [2]:
# 구축된 .npy파일을 Pytorch DataLoader을 사용할 수 있도록 CUSTOM DATASET을 만듬.
import numpy as np
from google.colab import drive
from sklearn.model_selection import train_test_split

default_path = "/content/drive/My Drive/ai_data/final/"

CUB200_TYPE_TRAIN = 1
CUB200_TYPE_TEST = 2
CUB200_TYPE_SUBMIT = 3

drive.mount('/content/drive')
class CUB200(data.Dataset):

    def __init__(self, type, transform = None):
        super(CUB200, self).__init__()
        """
        type : int = 1, 2, 3
        """

        original_train_data = np.load(default_path + 'train_image.npy')
        original_train_label = np.load(default_path + 'train_label.npy')

        train_data, test_data, train_label, test_label = train_test_split(
            original_train_data,
            original_train_label,
            test_size = 0.3,
            random_state = 1)
        
        if type == CUB200_TYPE_TRAIN:
          self.image = train_data
          self.label = train_label
        elif type == CUB200_TYPE_TEST:
          self.image = test_data
          self.label = test_label
        elif type == CUB200_TYPE_SUBMIT:
          self.image = np.load(default_path + 'test_image.npy')
          self.label = np.zeros(500)
        
        self.transform = transform

    def __getitem__(self, index):
        img, target = self.image[index], self.label[index]
        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)

        return img, target

    def __len__(self):
        return len(self.image)

Mounted at /content/drive


In [3]:
trainCUB = CUB200(CUB200_TYPE_TRAIN)
print(trainCUB.image.shape)
print(trainCUB.label.shape)

testCUB = CUB200(CUB200_TYPE_TEST)
print(testCUB.image.shape)
print(testCUB.label.shape)
print(np.max(testCUB.label), np.min(testCUB.label))

submitCUB = CUB200(CUB200_TYPE_SUBMIT)
print(submitCUB.image.shape)

(626, 256, 256, 3)
(626,)
(269, 256, 256, 3)
(269,)
49 0
(500, 256, 256, 3)


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
# train_data에만 data augmentaion을 적용
transform_train = transforms.Compose([
        transforms.RandomCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

In [41]:
# CUSTOM DATASET을 이용하여 train_loader, test_loader을 구축

batch_size = 32

train_loader = torch.utils.data.DataLoader(
    dataset = CUB200(CUB200_TYPE_TRAIN, transform = transform_train),
    batch_size = batch_size,
    shuffle = True
)

test_loader = torch.utils.data.DataLoader(
    dataset = CUB200(CUB200_TYPE_TEST, transform = transform_test),
    batch_size = batch_size,
    shuffle = False
)

submit_loader = torch.utils.data.DataLoader(
    dataset = CUB200(CUB200_TYPE_SUBMIT, transform = transform_test),
    batch_size = batch_size,
    shuffle = False
)

In [7]:
def training_model(model, criterion, optimizer, scheduler, num_epochs = 25):

    for epoch in range(num_epochs):
        scheduler.step()

        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 60 == 59:
                print('[%d, %5d] loss: %.7f' %
                    (epoch + 1, (i + 1), running_loss / 20))
                running_loss = 0.0
        
        train_correct = 0
        train_total = 0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs = inputs.squeeze()
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        print('[%d epoch] Accuracy of the network on the train images: %d %%' %
              (epoch + 1, 100 * train_correct / train_total))
        
    print("End Training do it eval_accuracy")
    return model

In [21]:
def eval_accuracy(model):
    class_correct = list(0. for i in range(50))
    class_total = list(0. for i in range(50))

    correct = 0
    total = 0
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            print("#", i, outputs)
            c = (predicted == labels).squeeze()
                    
            for i in range(labels.shape[0]):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
                
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

    print('Accuracy of the network on test images: %d %%' % (
        100 * correct / total))            
                
    return 

In [42]:
model_ft = models.resnet152(pretrained=True)
for param in model_ft.parameters():
  param.requires_grad = False
model_ft.fc = nn.Sequential(
        nn.Linear(model_ft.fc.in_features, 50),
#        nn.Linear(512,256),
#        nn.Linear(256, 50),
        # nn.Softmax(),
    )
# model_ft

In [43]:
num_epochs = 32
model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model_ft.parameters(), lr = 0.005)
optimizer = optim.SGD(model_ft.parameters(), lr=0.0075, momentum=0.9)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 16, gamma = 0.1)

model_ft = training_model(model_ft, criterion, optimizer, lr_scheduler, num_epochs)



[1 epoch] Accuracy of the network on the train images: 32 %
[2 epoch] Accuracy of the network on the train images: 60 %
[3 epoch] Accuracy of the network on the train images: 74 %
[4 epoch] Accuracy of the network on the train images: 84 %
[5 epoch] Accuracy of the network on the train images: 92 %
[6 epoch] Accuracy of the network on the train images: 93 %
[7 epoch] Accuracy of the network on the train images: 94 %
[8 epoch] Accuracy of the network on the train images: 94 %
[9 epoch] Accuracy of the network on the train images: 94 %
[10 epoch] Accuracy of the network on the train images: 96 %
[11 epoch] Accuracy of the network on the train images: 98 %
[12 epoch] Accuracy of the network on the train images: 99 %
[13 epoch] Accuracy of the network on the train images: 99 %
[14 epoch] Accuracy of the network on the train images: 99 %
[15 epoch] Accuracy of the network on the train images: 99 %
[16 epoch] Accuracy of the network on the train images: 99 %
[17 epoch] Accuracy of the networ

In [44]:
eval_accuracy(model_ft)

# 0 tensor([[-2.9456e+00, -2.4177e+00, -1.2986e+00,  ...,  2.2963e+00,
          1.7275e+00,  1.8937e+00],
        [ 3.2103e+00,  4.3900e+00,  1.4850e+00,  ..., -5.7209e-01,
         -2.0961e+00, -1.6914e+00],
        [-1.2293e+00,  7.0151e-02,  7.3736e-01,  ..., -1.2961e+00,
          5.9152e-04, -1.3629e+00],
        ...,
        [ 1.8596e+00, -1.7134e-01, -2.2384e-01,  ...,  6.9424e-01,
         -9.9314e-01,  8.4216e-01],
        [ 1.9322e+00,  1.2319e+00, -8.3993e-01,  ..., -9.0468e-01,
         -1.9321e+00,  1.6123e-01],
        [-2.6517e+00, -2.3488e+00, -1.8799e+00,  ..., -1.5802e+00,
          1.2412e-02,  1.9279e+00]], device='cuda:0')
# 1 tensor([[-3.8309, -2.7546, -1.6361,  ..., -1.2270,  1.3564,  0.0103],
        [ 2.3239,  2.0119, -0.0557,  ...,  0.3120, -2.2176, -1.2236],
        [-2.1594, -1.5712,  0.2066,  ..., -2.2694,  1.4698, -1.1219],
        ...,
        [-1.3658, -1.8868, -0.8866,  ...,  2.7514, -1.6436,  0.7377],
        [-0.1095,  0.2058,  1.5054,  ..., -0.3579,

In [None]:
import itertools

def get_result(model):
  result=[]
  model.eval()
  with torch.no_grad():
    for i, data in enumerate(submit_loader, 0):
      images, _ = data
      images = images.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs, 1)
      result.append(predicted.cpu().numpy())
  return list(itertools.chain(*result))

In [None]:
submit_result = get_result(model_ft)

In [None]:
print(submit_result)

[16, 12, 23, 30, 47, 32, 38, 24, 16, 30, 48, 47, 41, 20, 35, 26, 1, 8, 37, 7, 4, 22, 22, 35, 38, 2, 42, 23, 30, 34, 5, 37, 39, 33, 44, 28, 5, 7, 40, 49, 32, 37, 32, 7, 8, 43, 22, 11, 39, 16, 12, 25, 42, 6, 32, 48, 43, 39, 8, 45, 0, 5, 43, 12, 14, 26, 40, 5, 3, 9, 48, 11, 9, 35, 12, 20, 40, 36, 42, 35, 39, 41, 11, 26, 22, 32, 4, 48, 8, 48, 33, 5, 19, 14, 34, 9, 40, 21, 28, 27, 19, 35, 27, 23, 45, 4, 9, 31, 18, 49, 10, 47, 1, 48, 25, 11, 42, 31, 42, 35, 3, 1, 21, 29, 40, 10, 38, 18, 33, 1, 32, 6, 44, 1, 9, 33, 43, 37, 16, 2, 40, 38, 1, 38, 3, 33, 26, 12, 34, 41, 11, 6, 39, 46, 38, 0, 22, 42, 3, 38, 1, 13, 41, 3, 21, 41, 14, 10, 43, 33, 5, 44, 41, 19, 7, 27, 46, 31, 11, 13, 46, 20, 34, 42, 24, 25, 15, 42, 6, 42, 36, 39, 6, 12, 48, 25, 29, 5, 6, 26, 2, 13, 16, 29, 2, 18, 9, 14, 6, 4, 44, 38, 21, 26, 24, 21, 29, 46, 2, 9, 16, 22, 39, 12, 31, 2, 18, 16, 13, 24, 35, 8, 27, 37, 27, 15, 24, 20, 20, 8, 42, 24, 40, 13, 19, 38, 42, 6, 6, 4, 31, 39, 46, 48, 1, 42, 10, 33, 26, 10, 29, 39, 30, 47, 0,

In [None]:
pip install pycryptodomex --no-binary :all:



In [None]:
import json
from base64 import b64encode
from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import pad

def read_txt(fileName):
    with open(fileName, 'rt') as f:
        list_data = [a.strip('\n\r') for a in f.readlines()]
    return list_data

def write_json(fileName, data):
    with open(fileName, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

def load_key(key_path):
    with open(key_path, "rb") as f:
        key = f.read()
    return key

def encrypt_data(key_path, ans_list, encrypt_store_path='ans.json'):
    key = load_key(key_path)
    print(key)
    data = " ".join([str(i) for i in ans_list])
    encode_data = data.encode()
    cipher = AES.new(key, AES.MODE_CBC)
    ct_bytes = cipher.encrypt(pad(encode_data, AES.block_size))
    iv = b64encode(cipher.iv).decode('utf-8')
    ct = b64encode(ct_bytes).decode('utf-8')
    write_json(encrypt_store_path, {'iv':iv, 'ciphertext':ct})

if __name__=="__main__":
    # 1.이메일을 통해서 전달 받은 키 파일의 경로 입력
    key_path = default_path + "team9.pem"
    # key_path = "/content/drive/My Drive/ColabNotebooks/aiproject/team9.pem"
    # 2. 예측한 결과를 텍스트 파일로 저장했을 경우 리스트로 다시 불러오기
    # 본인이 원하는 방식으로 리스트 형태로 예측 값을 불러오기만 하면 됨(순서를 지킬것)
    #raw_ans_path = "ans.txt"
    #ans = read_txt(raw_ans_path)
    #ans에 result 저장한 리스트 넣기
    ans = submit_result
    # 3. 암호화된 파일을 저장할 위치
    encrypt_ans_path = default_path + "ai_answer.json"
    # 4. 암호화!(pycrytodome 설치)
    encrypt_data(key_path, ans, encrypt_ans_path)
    print("finished!")

b'\xfa@F\x83\xc3~\xb2\xff\x93*-\xff\xac%\xf7#'
finished!


0 0 0 0 ... 50
...
500

np.torch([0 0 0 0 0])

