In [1]:
import csv
from turtle import forward
from tqdm import tqdm
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
from torchsummary   import summary
import json
import os
import pandas as pd
import cv2
from PIL import Image 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def parse_file_number(col):
    return int(col.split(".")[0])

class MyDataset(Dataset) :

    def __init__(self,meta_path,root_dir,transform=None) :
        super().__init__()
        #===============meta data===============
        with open(meta_path, 'r') as file:
            temp_meta_data = json.load(file)
        self.meta = pd.json_normalize(temp_meta_data['annotations'])
        self.meta['file_name'] = self.meta['file_name'].apply(parse_file_number)
        self.meta = self.meta.sort_values("file_name").reset_index(drop=True)
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self) :
        return len(self.meta)
    
    def __getitem__(self,idx) :
        img_path = os.path.join(self.root_dir, str(idx)) + ".jpg"
        img = Image.open(img_path).convert("RGB")
        y_label = (int(self.meta.loc[idx, 'category']))
#         if self.transform:
        img = self.transform(img)
        return (img, y_label)


In [3]:
mode = 'train' 
train_data_dir = "./train_data"
meta_path = "./answer.json"

# Create training and validation datasets
# test_datasets = MyDataset(meta_path, data_dir, data_transforms['train'])
transformer = transforms.Compose([
    transforms.Resize((600,600)),
    transforms.ToTensor()
])
train_data = MyDataset(meta_path, train_data_dir, transform=transformer)

In [4]:
# test_tf = transforms.ToPILImage()

# test_tf(train_data[11365][0])

In [5]:
print(train_data[0][1])

8


In [6]:
class MyModel(nn.Module) :
    def __init__(self,in_channels,out_channels):
        super().__init__()
        #TODO: Make your own model
        self.in_channels = in_channels
        self.out_channles = out_channels
        self.layer = nn.Sequential(
            #batch = 1, channel 3, height 400, width = 600 
            #Conv [1, 3, 600, 600] -> [1, 16, 598, 598]
            nn.Conv2d(in_channels = 3,  out_channels=16, kernel_size=3),
            nn.ReLU(),
              
            #Conv [1, 16, 598, 598] -> [1, 32, 596, 596]
            nn.Conv2d(in_channels = 16,  out_channels=32, kernel_size=3),
            nn.ReLU(),

            #Pool [1, 32, 596, 596] -> [1, 32, 298, 298]
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            #Conv [1, 32, 298, 298] -> [1, 64, 296, 296]
            nn.Conv2d(in_channels = 32,  out_channels=64, kernel_size=3),
            nn.ReLU(),

            #Pool [1, 64, 296, 296] -> [1, 64, 148, 148] 
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            #Conv [1, 64, 148, 148] -> [1, 128, 146, 146]
            nn.Conv2d(in_channels = 64,  out_channels=128, kernel_size=3),
            nn.ReLU(),
            
            #Conv [1, 128, 146, 146] -> [1, 256, 144, 144]
            nn.Conv2d(in_channels = 128,  out_channels=256, kernel_size=3),
            nn.ReLU(),
            
            #Pool [1, 256, 144, 144]-> [1, 256, 72, 72] 
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            #Conv [1, 256, 72, 72] -> [1, 512, 70, 70]
            nn.Conv2d(in_channels = 256,  out_channels=512, kernel_size=3),
            nn.ReLU(),

            #Conv [1, 512, 70, 70] -> [1, 1024, 68, 68]
            nn.Conv2d(in_channels = 512,  out_channels=1024, kernel_size=3),
            nn.ReLU(),
            
            #Pool [1, 1024, 68, 68] -> [1, 1024, 34, 34]
            nn.MaxPool2d(kernel_size=2, stride=2),
            
        )
        self.fc_layer = nn.Sequential(
            #linear [1, 512 * 36 * 36] -> [100, 100]
            nn.Linear(1024*34*34, 1000),
            nn.ReLU(),
            nn.Linear(1000,80)
        )
        
    def forward(self,x) :
        #TODO:
#         print("x.shape: ", x.shape)
        out = self.layer(x)
#         print("out.shape: ", out.shape)
        out = out.view(1, -1)
#         print("out.size(0): ", out.size(0))
        out = self.fc_layer(out)
        return out 
    

In [7]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device="cpu"

In [8]:
# torch.cuda.empty_cache()

In [9]:
batch = 1
learning_rate = 0.001
model  = MyModel(3,128).to(device)

In [13]:
summary(model, input_size=(3,600,600),device=device)

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Conv2d: 2-1                       448
|    └─ReLU: 2-2                         --
|    └─Conv2d: 2-3                       4,640
|    └─ReLU: 2-4                         --
|    └─MaxPool2d: 2-5                    --
|    └─Conv2d: 2-6                       18,496
|    └─ReLU: 2-7                         --
|    └─MaxPool2d: 2-8                    --
|    └─Conv2d: 2-9                       73,856
|    └─ReLU: 2-10                        --
|    └─Conv2d: 2-11                      295,168
|    └─ReLU: 2-12                        --
|    └─MaxPool2d: 2-13                   --
|    └─Conv2d: 2-14                      1,180,160
|    └─ReLU: 2-15                        --
|    └─Conv2d: 2-16                      4,719,616
|    └─ReLU: 2-17                        --
|    └─MaxPool2d: 2-18                   --
├─Sequential: 1-2                        --
|    └─Linear: 2-19                     

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Conv2d: 2-1                       448
|    └─ReLU: 2-2                         --
|    └─Conv2d: 2-3                       4,640
|    └─ReLU: 2-4                         --
|    └─MaxPool2d: 2-5                    --
|    └─Conv2d: 2-6                       18,496
|    └─ReLU: 2-7                         --
|    └─MaxPool2d: 2-8                    --
|    └─Conv2d: 2-9                       73,856
|    └─ReLU: 2-10                        --
|    └─Conv2d: 2-11                      295,168
|    └─ReLU: 2-12                        --
|    └─MaxPool2d: 2-13                   --
|    └─Conv2d: 2-14                      1,180,160
|    └─ReLU: 2-15                        --
|    └─Conv2d: 2-16                      4,719,616
|    └─ReLU: 2-17                        --
|    └─MaxPool2d: 2-18                   --
├─Sequential: 1-2                        --
|    └─Linear: 2-19                     

In [10]:
loss_func = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
train_loader = DataLoader(
    train_data, batch_size=batch)

In [12]:
# for i,(img, label) in enumerate(train_loader):
#     print(i)
#     print(img)    
#     print(label)        

In [13]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x21034b94940>

In [14]:
# loss_arr =[]
# num_epoch = 1
# for i in range(num_epoch):
#     print("i: ", i)
#     for j,(image,label) in (train_loader):
#         print("1")
#         x = image.to(device)
#         print("2")
#         y= label.to(device)
#         print("3")
#         optimizer.zero_grad()
        
#         output = model.forward(x)
#         print("j: ", j)
        
#         loss = loss_func(output,y)
#         loss.backward()
#         optimizer.step()
        
#         if j % 1000 == 0:
#             print(loss)
#             loss_arr.append(loss.cpu().detach().numpy())

In [15]:
loss_arr =[]
num_epoch = 1
for i in range(num_epoch):
    loop = tqdm(train_loader, total = len(train_loader), leave=True)
    for j, (image, label) in enumerate(loop):
#         print("asdasd")
        x = image.to(device)
        y = label.to(device)
        optimizer.zero_grad()
        
        output = model.forward(x)
        
        loss = loss_func(output,y)
        loss.backward()
        optimizer.step()
        
        if( j % 10 == 0):
            print(loss)
#             loss_arr.append(loss.cpu().detach().numpy())

  0%|                                                                             | 1/40000 [00:05<57:39:03,  5.19s/it]

tensor(4.3903, grad_fn=<NllLossBackward0>)


  0%|                                                                            | 11/40000 [00:56<58:20:19,  5.25s/it]

tensor(4.3676, grad_fn=<NllLossBackward0>)


  0%|                                                                            | 21/40000 [01:51<66:11:39,  5.96s/it]

tensor(4.3873, grad_fn=<NllLossBackward0>)


  0%|                                                                            | 31/40000 [02:43<57:35:34,  5.19s/it]

tensor(4.3640, grad_fn=<NllLossBackward0>)


  0%|                                                                            | 41/40000 [03:35<57:40:22,  5.20s/it]

tensor(4.3766, grad_fn=<NllLossBackward0>)


  0%|                                                                            | 51/40000 [04:30<61:14:32,  5.52s/it]

tensor(4.4014, grad_fn=<NllLossBackward0>)


  0%|                                                                            | 61/40000 [05:26<63:14:03,  5.70s/it]

tensor(4.3826, grad_fn=<NllLossBackward0>)


  0%|▏                                                                           | 71/40000 [06:18<58:58:24,  5.32s/it]

tensor(4.3306, grad_fn=<NllLossBackward0>)


  0%|▏                                                                           | 81/40000 [07:18<73:33:48,  6.63s/it]

tensor(4.3547, grad_fn=<NllLossBackward0>)


  0%|▏                                                                           | 91/40000 [08:09<56:24:44,  5.09s/it]

tensor(4.4159, grad_fn=<NllLossBackward0>)


  0%|▏                                                                          | 101/40000 [09:00<57:56:32,  5.23s/it]

tensor(4.3368, grad_fn=<NllLossBackward0>)


  0%|▏                                                                          | 111/40000 [09:50<55:59:19,  5.05s/it]

tensor(4.3462, grad_fn=<NllLossBackward0>)


  0%|▏                                                                          | 121/40000 [10:40<55:36:15,  5.02s/it]

tensor(4.3303, grad_fn=<NllLossBackward0>)


  0%|▏                                                                          | 131/40000 [11:35<59:48:40,  5.40s/it]

tensor(4.3529, grad_fn=<NllLossBackward0>)


  0%|▎                                                                          | 141/40000 [12:30<66:26:02,  6.00s/it]

tensor(4.3999, grad_fn=<NllLossBackward0>)


  0%|▎                                                                          | 151/40000 [13:21<57:59:53,  5.24s/it]

tensor(4.3347, grad_fn=<NllLossBackward0>)


  0%|▎                                                                          | 161/40000 [14:14<58:04:24,  5.25s/it]

tensor(4.3510, grad_fn=<NllLossBackward0>)


  0%|▎                                                                          | 171/40000 [15:05<56:30:25,  5.11s/it]

tensor(4.3196, grad_fn=<NllLossBackward0>)


  0%|▎                                                                          | 181/40000 [15:54<55:26:13,  5.01s/it]

tensor(4.5144, grad_fn=<NllLossBackward0>)


  0%|▎                                                                          | 191/40000 [16:45<55:15:03,  5.00s/it]

tensor(4.4146, grad_fn=<NllLossBackward0>)


  0%|▎                                                                          | 193/40000 [16:57<58:17:59,  5.27s/it]


KeyboardInterrupt: 

In [75]:
train_data.meta.sort_values('height',ascending=False)

Unnamed: 0,file_name,category,height,width
31777,31777,61,1080,1080
11365,11365,38,1080,1920
36847,36847,64,1080,1080
28481,28481,61,980,736
8021,8021,68,829,550
...,...,...,...,...
35928,35928,71,130,197
30577,30577,46,0,0
26310,26310,46,0,0
22393,22393,57,0,0


In [76]:
train_data.meta.sort_values('width',ascending=False)

Unnamed: 0,file_name,category,height,width
11365,11365,38,1080,1920
18036,18036,52,720,1280
32657,32657,59,720,1280
38353,38353,61,720,1280
38198,38198,40,720,1280
...,...,...,...,...
35831,35831,46,130,130
26310,26310,46,0,0
38006,38006,53,0,0
30577,30577,46,0,0


In [None]:
cv2.imread()

In [76]:
train_data[1][0].shape

torch.Size([3, 400, 600])

In [66]:
# to_image = transforms.ToPILImage()

# test = to_image(train_data[1][0])
# test.show()

[[[255 248 241]
  [255 248 241]
  [255 248 241]
  ...
  [254 245 232]
  [254 245 232]
  [254 245 232]]

 [[255 248 241]
  [255 248 241]
  [255 248 241]
  ...
  [254 245 232]
  [254 245 232]
  [254 245 232]]

 [[255 248 241]
  [255 248 241]
  [255 248 241]
  ...
  [254 245 232]
  [254 245 232]
  [254 245 232]]

 ...

 [[219 213 202]
  [219 213 202]
  [218 212 201]
  ...
  [184 180 161]
  [185 181 163]
  [185 181 162]]

 [[219 213 202]
  [219 213 202]
  [218 212 201]
  ...
  [182 179 158]
  [183 179 160]
  [183 180 159]]

 [[219 213 202]
  [219 213 202]
  [218 212 201]
  ...
  [181 178 157]
  [182 179 158]
  [182 179 158]]]
./train_data\1.jpg


In [13]:
test_datasets.meta.loc[1,'category']

'43'

In [48]:
len(test_datasets)

40000

In [52]:
test_datasets.meta[test_datasets.meta['category']=="0"]

NameError: name 'test_datasets' is not defined

In [1]:
def train() :
    #TODO: Make your own training code

    # You SHOULD save your model by
    # torch.save(model.state_dict(), './checkpoint.pth') 
    # You SHOULD not modify the save path
    pass


def get_model(model_name, checkpoint_path):
    
    model = model_name()
    model.load_state_dict(torch.load(checkpoint_path))
    
    return model


def test():
    
    model_name = MyModel
    checkpoint_path = './model.pth' 
    mode = 'test' 
    data_dir = "./test_data"
    meta_path = "./answer.json"
    model = get_model(model_name,checkpoint_path)

    data_transforms = {
        'train' :"YOUR_DATA_TRANSFORM_FUNCTION" , 
        'test': "YOUR_DATA_TRANSFORM_FUNCTION"
    }

    # Create training and validation datasets
    test_datasets = MyDataset(meta_path, data_dir, data_transforms['mode'])

    batch_size = 1
    # Create training and validation dataloaders
    test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=batch_size, shuffle=False, num_workers=4)

    # Detect if we have a GPU available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Send the model to GPU
    model = model.to(device)

    # Set model as evaluation mode
    for param in model.parameters():
        param.requires_grad = False
    model.eval()
    
    # Inference
    result = []
    for images, filename in tqdm(test_dataloader):
        num_image = images.shape[0]
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        for i in range(num_image):
            result.append({
                'filename': filename[i],
                'class': preds[i].item()
            })

    result = sorted(result,key=lambda x : int(x['filename'].split('.')[0]))
    
    # Save to csv
    with open('./result.csv', 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['filename','class'])
        for res in result:
            writer.writerow([res['filename'], result['class']])


def main() :
    pass


if __name__ == '__main__':
    main()

  from .autonotebook import tqdm as notebook_tqdm
