# Import Modules

## Standard modules

In [1]:
import os
import json
import pickle as pkl

from collections import Counter

## External modules

In [2]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.metrics import precision_score, \
                            recall_score, \
                            f1_score, \
                            roc_auc_score \
            
from tqdm import tqdm, trange
from pylab import rcParams



tqdm.pandas()
%matplotlib inline
warnings.filterwarnings('ignore')
rcParams['figure.figsize'] = 10, 10

  from pandas import Panel


## Internal modules

In [3]:
import utils_scripts as utlis

# Constants

In [3]:
RANDOM_SEED = 17
np.random.seed(RANDOM_SEED)

ABS_PATH = '/kaggle/input/herbarium-2020-fgvc7/nybg2020/'

# Data EDA

In [4]:
def get_result_df(path, set_value):
    with open(os.path.join(ABS_PATH, set_value, 'metadata.json'), "r", encoding="ISO-8859-1") as file:
        metadata = json.load(file)
    
    for column_name, column in metadata.items():
        print(f'{column_name} - {len(column)} values')
        
    img_info = pd.DataFrame(metadata['images'])
    
    if set_value == 'train':
        annotation_info = pd.DataFrame(metadata['annotations']).drop(columns=['image_id'])
        img_info = img_info.merge(annotation_info, on='id')
    
    img_info['file_name'] = img_info['file_name'].progress_apply(lambda x : os.path.join(path, set_value, x))
    return img_info

In [5]:
submission_example = pd.read_csv('/kaggle/input/herbarium-2020-fgvc7/sample_submission.csv')
submission_example.tail()

Unnamed: 0,Id,Predicted
138287,138287,0
138288,138288,0
138289,138289,0
138290,138290,0
138291,138291,0


In [6]:
metadata_train = get_result_df(path=ABS_PATH, set_value='train')
metadata_train.head()

annotations - 1030747 values
categories - 32094 values
images - 1030747 values
info - 6 values
licenses - 1 values
regions - 4 values


100%|██████████| 1030747/1030747 [00:04<00:00, 251398.45it/s]


Unnamed: 0,file_name,height,id,license,width,category_id,region_id
0,/kaggle/input/herbarium-2020-fgvc7/nybg2020/tr...,1000,354106,1,661,15672,1
1,/kaggle/input/herbarium-2020-fgvc7/nybg2020/tr...,1000,818566,1,661,11524,1
2,/kaggle/input/herbarium-2020-fgvc7/nybg2020/tr...,1000,750704,1,661,11524,1
3,/kaggle/input/herbarium-2020-fgvc7/nybg2020/tr...,1000,722381,1,661,11467,1
4,/kaggle/input/herbarium-2020-fgvc7/nybg2020/tr...,1000,382783,1,661,15660,1


In [7]:
metadata_test = get_result_df(path=ABS_PATH, set_value='test')
metadata_test.head()

images - 138292 values
info - 6 values
licenses - 1 values


100%|██████████| 138292/138292 [00:00<00:00, 289892.23it/s]


Unnamed: 0,file_name,height,id,license,width
0,/kaggle/input/herbarium-2020-fgvc7/nybg2020/te...,1000,104891,1,661
1,/kaggle/input/herbarium-2020-fgvc7/nybg2020/te...,1000,18029,1,661
2,/kaggle/input/herbarium-2020-fgvc7/nybg2020/te...,1000,35151,1,661
3,/kaggle/input/herbarium-2020-fgvc7/nybg2020/te...,1000,124144,1,682
4,/kaggle/input/herbarium-2020-fgvc7/nybg2020/te...,1000,24649,1,682


In [8]:
classes = sorted(list(metadata_train['category_id'].unique()))
classes == list(range(min(classes), len(classes) + 1))

False

In [None]:
metadata_train['category_id'].value_counts()

## Label preprocessing

In [9]:
le_preprocessor = LabelEncoder()
le_preprocessor.fit(metadata_train['category_id'])

LabelEncoder()

In [10]:
metadata_train['category_id_le_preprocessed'] = le_preprocessor.transform(metadata_train['category_id'])

In [11]:
classes = sorted(list(metadata_train['category_id_le_preprocessed'].unique()))
classes == list(range(min(classes), len(classes)))

True

# Train Test Split

In [41]:
# train_indices, test_indices, _, _ = train_test_split(metadata_train.index, 
#                                                      metadata_train['category_id_le_preprocessed'],
#                                                      train_size=0.75, 
#                                                      random_state=RANDOM_SEED,                                                     
#                                                      shuffle=True, 
#                                                      stratify=metadata_train['category_id_le_preprocessed'])

In [12]:
grouped = metadata_train.groupby('category_id_le_preprocessed', as_index=False).count()

In [13]:
little_classes = grouped[grouped['id'] < 3]['category_id_le_preprocessed']

In [14]:
little_classes

2            2
21          21
24          24
28          28
38          38
         ...  
32061    32061
32068    32068
32073    32073
32080    32080
32082    32082
Name: category_id_le_preprocessed, Length: 3729, dtype: int64

In [15]:
train_indices, test_indices, _, _ = train_test_split(metadata_train.index, 
                                                     metadata_train['category_id_le_preprocessed'],
                                                     train_size=0.75, 
                                                     random_state=RANDOM_SEED,                                                     
                                                     shuffle=True)

In [16]:
train_data = metadata_train.loc[train_indices, :]
train_data.shape

train_data.reset_index(inplace=True)

In [17]:
test_data = metadata_train.loc[test_indices, :]
test_data.shape

test_data.reset_index(inplace=True)

In [18]:
test_indices, val_indices, _, _ = train_test_split(test_data.index, 
                                                   test_data['category_id_le_preprocessed'],
                                                   train_size=0.80, 
                                                   random_state=RANDOM_SEED,                                                     
                                                   shuffle=True)

In [19]:
val_data = test_data.loc[val_indices, :]
val_data.shape
val_data.reset_index(inplace=True)

In [20]:
test_data = test_data.loc[test_indices, :]
test_data.shape
test_data.reset_index(inplace=True)

## Class weights

In [22]:
class_weights = Counter(train_data['category_id_le_preprocessed'])
class_weights = [item[1] for item in sorted(list(class_weights.items()), key=lambda x : x[0])]

# Model Development

In [21]:
import torch

In [22]:
from torch import Tensor
from torch.utils.data import DataLoader
from utils_scripts import Specimen_Dataset, \
                          Data_Pipeline, \
                          Resizer, \
                          Normalizer, \
                          ToTensor

In [23]:
data_pipe_obj = Data_Pipeline(
    Resizer(output_size=(256,256)),
    Normalizer(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
    ToTensor()
)

In [24]:
train_dataset = Specimen_Dataset(dataset=train_data, set_value='train', transform=data_pipe_obj)
test_dataset = Specimen_Dataset(dataset=test_data, set_value='test', transform=data_pipe_obj)
val_dataset = Specimen_Dataset(dataset=val_data, set_value='val', transform=data_pipe_obj)
test_subm_dataset = Specimen_Dataset(dataset=metadata_test, set_value='test_submission', transform=data_pipe_obj)

In [26]:
print(f'train dataset : {len(train_dataset)}')
print(f'test dataset : {len(test_dataset)}')
print(f'val dataset : {len(val_dataset)}')
print(f'subm dataset : {len(test_subm_dataset)}')

train dataset : 773060
test dataset : 206149
val dataset : 51538
subm dataset : 138292


In [27]:
BATCH_SIZE = 256
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)

In [44]:
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=BATCH_SIZE, num_workers = 8)
test_dataloader = DataLoader(dataset=test_dataset, shuffle=True, batch_size=BATCH_SIZE, num_workers = 8)
val_dataloader = DataLoader(dataset=val_dataset, shuffle=True, batch_size=BATCH_SIZE, num_workers = 8)
test_subm_dataloader = DataLoader(dataset=test_subm_dataset, shuffle=False, batch_size=BATCH_SIZE, num_workers = 8)

In [None]:
for batch_index, batch in enumerate(train_dataloader, 0):
    images, categories = batch['img'], batch['category_id']
    
    print(images.shape)
    print(categories.shape)
    
    break
    

In [29]:
metadata_train.columns

Index(['file_name', 'height', 'id', 'license', 'width', 'category_id',
       'region_id', 'category_id_le_preprocessed'],
      dtype='object')

In [30]:
metadata_train['height'].value_counts()

1000    1030649
667          57
670           5
665           4
683           3
675           2
682           2
977           1
598           1
615           1
649           1
661           1
662           1
666           1
669           1
673           1
684           1
696           1
700           1
718           1
720           1
721           1
731           1
742           1
788           1
802           1
818           1
858           1
386           1
928           1
975           1
533           1
Name: height, dtype: int64

In [31]:
metadata_train['width'].value_counts()

682    219673
667    212347
676    190476
681     86647
678     76882
        ...  
964         1
655         1
648         1
645         1
595         1
Name: width, Length: 181, dtype: int64

## ResNet-18

In [32]:
from collections import namedtuple

In [33]:
from torch.optim import SGD, lr_scheduler
from torch.nn import Linear, CrossEntropyLoss
from torchvision.models import resnet18

In [34]:
NUM_OF_CLASSES = len(classes)
NUM_OF_EPOCHES = 1

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [36]:
device

device(type='cuda')

In [45]:
resnet18_model = resnet18(pretrained=True)
resnet18_model.fc = Linear(resnet18_model.fc.in_features, NUM_OF_CLASSES)

In [46]:
resnet18_model = resnet18_model.to(device)

loss_func = CrossEntropyLoss()
optimizer_sgd = SGD(resnet18_model.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_sgd, step_size=7, gamma=0.1)

In [None]:
torch.cuda.empty_cache()

In [47]:
num_of_batches_train = len(train_dataset) // BATCH_SIZE
num_of_batches_val = len(val_dataset) // BATCH_SIZE
num_of_batches_test = len(test_dataset) // BATCH_SIZE
num_of_batches_subm = len(test_subm_dataset) // BATCH_SIZE

In [48]:
print(f'Train : {num_of_batches_train}')
print(f'Val : {num_of_batches_val}')
print(f'Test : {num_of_batches_test}')
print(f'Subm : {num_of_batches_subm}')

Train : 3019
Val : 201
Test : 805
Subm : 540


In [49]:
for epoch_i in trange(1,NUM_OF_EPOCHES+1, desc='Epoches'):
    y_true, y_pred = torch.Tensor([]), torch.Tensor([])
    y_true_val, y_pred_val = torch.Tensor([]), torch.Tensor([])
    
    y_true, y_pred = y_true.to(device), y_pred.to(device)
    y_true_val, y_pred_val = y_true_val.to(device), y_pred_val.to(device)
    
    epoch_loss = 0.0
    resnet18_model.train()
    
    print('TRAIN Batches processing...')

    for batch_index, batch in enumerate(train_dataloader, 0):
                
        images, categories = batch['img'], batch['category_id']
        images, categories = images.to(device), categories.to(device)
        
        optimizer_sgd.zero_grad()
        
        outputs = resnet18_model(images)
        
        _, preds = torch.max(outputs, 1)
        
#         print(preds.shape)
        
        y_pred = torch.cat((y_pred, preds))
        y_true = torch.cat((y_true, categories))
        
        loss = loss_func(outputs, categories)
        epoch_loss += loss.item()

        loss.backward()
        optimizer_sgd.step()
        exp_lr_scheduler.step()
        
        print(f'{batch_index + 1}/{num_of_batches_train} total batches processed')
        
#         break

    print()
        
    resnet18_model.eval()
    
    print('Val Batches processing...')
    
    with torch.no_grad():
        for batch_index, batch in enumerate(val_dataloader, 0):
            
            images, categories = batch['img'], batch['category_id']
            images, categories = images.to(device), categories.to(device)
            
            outputs = resnet18_model(images)
            
            _, preds = torch.max(outputs,1)
            
            y_pred_val = torch.cat((y_pred_val, preds))
            y_true_val = torch.cat((y_true_val, categories))
            
            print(f'{batch_index + 1}/{num_of_batches_val} total batches processed')
            
#             break
            
    print(f'Epoch : {epoch_i}')
    print(f'Loss : {epoch_loss / (batch_index+1)}')
    print()
    print('Train : ')
    print(f"PRECISION : {precision_score(y_true.cpu().numpy().astype(int), y_pred.cpu().numpy().astype(int), average='macro')}")
    print(f"RECALL : {recall_score(y_true.cpu().numpy().astype(int), y_pred.cpu().numpy().astype(int), average='macro')}")
    print(f"F1-SCORE : {f1_score(y_true.cpu().numpy().astype(int), y_pred.cpu().numpy().astype(int), average='macro')}")
    
    print()
    print('Val : ')
    print(f"PRECISION : {precision_score(y_true_val.cpu().numpy().astype(int), y_pred_val.cpu().numpy().astype(int), average='macro')}")
    print(f"RECALL : {recall_score(y_true_val.cpu().numpy().astype(int), y_pred_val.cpu().numpy().astype(int), average='macro')}")
    print(f"F1-SCORE : {f1_score(y_true_val.cpu().numpy().astype(int), y_pred_val.cpu().numpy().astype(int), average='macro')}")
    print()
        
        
torch.save(resnet18_model.state_dict(), '/kaggle/working/resnet18_V2.pth')

Epoches:   0%|          | 0/1 [00:00<?, ?it/s]

TRAIN Batches processing...
1/3019 total batches processed
2/3019 total batches processed
3/3019 total batches processed
4/3019 total batches processed
5/3019 total batches processed
6/3019 total batches processed
7/3019 total batches processed
8/3019 total batches processed
9/3019 total batches processed
10/3019 total batches processed
11/3019 total batches processed
12/3019 total batches processed
13/3019 total batches processed
14/3019 total batches processed
15/3019 total batches processed
16/3019 total batches processed
17/3019 total batches processed
18/3019 total batches processed
19/3019 total batches processed
20/3019 total batches processed
21/3019 total batches processed
22/3019 total batches processed
23/3019 total batches processed
24/3019 total batches processed
25/3019 total batches processed
26/3019 total batches processed
27/3019 total batches processed
28/3019 total batches processed
29/3019 total batches processed
30/3019 total batches processed
31/3019 total batches

252/3019 total batches processed
253/3019 total batches processed
254/3019 total batches processed
255/3019 total batches processed
256/3019 total batches processed
257/3019 total batches processed
258/3019 total batches processed
259/3019 total batches processed
260/3019 total batches processed
261/3019 total batches processed
262/3019 total batches processed
263/3019 total batches processed
264/3019 total batches processed
265/3019 total batches processed
266/3019 total batches processed
267/3019 total batches processed
268/3019 total batches processed
269/3019 total batches processed
270/3019 total batches processed
271/3019 total batches processed
272/3019 total batches processed
273/3019 total batches processed
274/3019 total batches processed
275/3019 total batches processed
276/3019 total batches processed
277/3019 total batches processed
278/3019 total batches processed
279/3019 total batches processed
280/3019 total batches processed
281/3019 total batches processed
282/3019 t

501/3019 total batches processed
502/3019 total batches processed
503/3019 total batches processed
504/3019 total batches processed
505/3019 total batches processed
506/3019 total batches processed
507/3019 total batches processed
508/3019 total batches processed
509/3019 total batches processed
510/3019 total batches processed
511/3019 total batches processed
512/3019 total batches processed
513/3019 total batches processed
514/3019 total batches processed
515/3019 total batches processed
516/3019 total batches processed
517/3019 total batches processed
518/3019 total batches processed
519/3019 total batches processed
520/3019 total batches processed
521/3019 total batches processed
522/3019 total batches processed
523/3019 total batches processed
524/3019 total batches processed
525/3019 total batches processed
526/3019 total batches processed
527/3019 total batches processed
528/3019 total batches processed
529/3019 total batches processed
530/3019 total batches processed
531/3019 t

750/3019 total batches processed
751/3019 total batches processed
752/3019 total batches processed
753/3019 total batches processed
754/3019 total batches processed
755/3019 total batches processed
756/3019 total batches processed
757/3019 total batches processed
758/3019 total batches processed
759/3019 total batches processed
760/3019 total batches processed
761/3019 total batches processed
762/3019 total batches processed
763/3019 total batches processed
764/3019 total batches processed
765/3019 total batches processed
766/3019 total batches processed
767/3019 total batches processed
768/3019 total batches processed
769/3019 total batches processed
770/3019 total batches processed
771/3019 total batches processed
772/3019 total batches processed
773/3019 total batches processed
774/3019 total batches processed
775/3019 total batches processed
776/3019 total batches processed
777/3019 total batches processed
778/3019 total batches processed
779/3019 total batches processed
780/3019 t

999/3019 total batches processed
1000/3019 total batches processed
1001/3019 total batches processed
1002/3019 total batches processed
1003/3019 total batches processed
1004/3019 total batches processed
1005/3019 total batches processed
1006/3019 total batches processed
1007/3019 total batches processed
1008/3019 total batches processed
1009/3019 total batches processed
1010/3019 total batches processed
1011/3019 total batches processed
1012/3019 total batches processed
1013/3019 total batches processed
1014/3019 total batches processed
1015/3019 total batches processed
1016/3019 total batches processed
1017/3019 total batches processed
1018/3019 total batches processed
1019/3019 total batches processed
1020/3019 total batches processed
1021/3019 total batches processed
1022/3019 total batches processed
1023/3019 total batches processed
1024/3019 total batches processed
1025/3019 total batches processed
1026/3019 total batches processed
1027/3019 total batches processed
1028/3019 total

1240/3019 total batches processed
1241/3019 total batches processed
1242/3019 total batches processed
1243/3019 total batches processed
1244/3019 total batches processed
1245/3019 total batches processed
1246/3019 total batches processed
1247/3019 total batches processed
1248/3019 total batches processed
1249/3019 total batches processed
1250/3019 total batches processed
1251/3019 total batches processed
1252/3019 total batches processed
1253/3019 total batches processed
1254/3019 total batches processed
1255/3019 total batches processed
1256/3019 total batches processed
1257/3019 total batches processed
1258/3019 total batches processed
1259/3019 total batches processed
1260/3019 total batches processed
1261/3019 total batches processed
1262/3019 total batches processed
1263/3019 total batches processed
1264/3019 total batches processed
1265/3019 total batches processed
1266/3019 total batches processed
1267/3019 total batches processed
1268/3019 total batches processed
1269/3019 tota

1481/3019 total batches processed
1482/3019 total batches processed
1483/3019 total batches processed
1484/3019 total batches processed
1485/3019 total batches processed
1486/3019 total batches processed
1487/3019 total batches processed
1488/3019 total batches processed
1489/3019 total batches processed
1490/3019 total batches processed
1491/3019 total batches processed
1492/3019 total batches processed
1493/3019 total batches processed
1494/3019 total batches processed
1495/3019 total batches processed
1496/3019 total batches processed
1497/3019 total batches processed
1498/3019 total batches processed
1499/3019 total batches processed
1500/3019 total batches processed
1501/3019 total batches processed
1502/3019 total batches processed
1503/3019 total batches processed
1504/3019 total batches processed
1505/3019 total batches processed
1506/3019 total batches processed
1507/3019 total batches processed
1508/3019 total batches processed
1509/3019 total batches processed
1510/3019 tota

1722/3019 total batches processed
1723/3019 total batches processed
1724/3019 total batches processed
1725/3019 total batches processed
1726/3019 total batches processed
1727/3019 total batches processed
1728/3019 total batches processed
1729/3019 total batches processed
1730/3019 total batches processed
1731/3019 total batches processed
1732/3019 total batches processed
1733/3019 total batches processed
1734/3019 total batches processed
1735/3019 total batches processed
1736/3019 total batches processed
1737/3019 total batches processed
1738/3019 total batches processed
1739/3019 total batches processed
1740/3019 total batches processed
1741/3019 total batches processed
1742/3019 total batches processed
1743/3019 total batches processed
1744/3019 total batches processed
1745/3019 total batches processed
1746/3019 total batches processed
1747/3019 total batches processed
1748/3019 total batches processed
1749/3019 total batches processed
1750/3019 total batches processed
1751/3019 tota

1963/3019 total batches processed
1964/3019 total batches processed
1965/3019 total batches processed
1966/3019 total batches processed
1967/3019 total batches processed
1968/3019 total batches processed
1969/3019 total batches processed
1970/3019 total batches processed
1971/3019 total batches processed
1972/3019 total batches processed
1973/3019 total batches processed
1974/3019 total batches processed
1975/3019 total batches processed
1976/3019 total batches processed
1977/3019 total batches processed
1978/3019 total batches processed
1979/3019 total batches processed
1980/3019 total batches processed
1981/3019 total batches processed
1982/3019 total batches processed
1983/3019 total batches processed
1984/3019 total batches processed
1985/3019 total batches processed
1986/3019 total batches processed
1987/3019 total batches processed
1988/3019 total batches processed
1989/3019 total batches processed
1990/3019 total batches processed
1991/3019 total batches processed
1992/3019 tota

2204/3019 total batches processed
2205/3019 total batches processed
2206/3019 total batches processed
2207/3019 total batches processed
2208/3019 total batches processed
2209/3019 total batches processed
2210/3019 total batches processed
2211/3019 total batches processed
2212/3019 total batches processed
2213/3019 total batches processed
2214/3019 total batches processed
2215/3019 total batches processed
2216/3019 total batches processed
2217/3019 total batches processed
2218/3019 total batches processed
2219/3019 total batches processed
2220/3019 total batches processed
2221/3019 total batches processed
2222/3019 total batches processed
2223/3019 total batches processed
2224/3019 total batches processed
2225/3019 total batches processed
2226/3019 total batches processed
2227/3019 total batches processed
2228/3019 total batches processed
2229/3019 total batches processed
2230/3019 total batches processed
2231/3019 total batches processed
2232/3019 total batches processed
2233/3019 tota

2445/3019 total batches processed
2446/3019 total batches processed
2447/3019 total batches processed
2448/3019 total batches processed
2449/3019 total batches processed
2450/3019 total batches processed
2451/3019 total batches processed
2452/3019 total batches processed
2453/3019 total batches processed
2454/3019 total batches processed
2455/3019 total batches processed
2456/3019 total batches processed
2457/3019 total batches processed
2458/3019 total batches processed
2459/3019 total batches processed
2460/3019 total batches processed
2461/3019 total batches processed
2462/3019 total batches processed
2463/3019 total batches processed
2464/3019 total batches processed
2465/3019 total batches processed
2466/3019 total batches processed
2467/3019 total batches processed
2468/3019 total batches processed
2469/3019 total batches processed
2470/3019 total batches processed
2471/3019 total batches processed
2472/3019 total batches processed
2473/3019 total batches processed
2474/3019 tota

2686/3019 total batches processed
2687/3019 total batches processed
2688/3019 total batches processed
2689/3019 total batches processed
2690/3019 total batches processed
2691/3019 total batches processed
2692/3019 total batches processed
2693/3019 total batches processed
2694/3019 total batches processed
2695/3019 total batches processed
2696/3019 total batches processed
2697/3019 total batches processed
2698/3019 total batches processed
2699/3019 total batches processed
2700/3019 total batches processed
2701/3019 total batches processed
2702/3019 total batches processed
2703/3019 total batches processed
2704/3019 total batches processed
2705/3019 total batches processed
2706/3019 total batches processed
2707/3019 total batches processed
2708/3019 total batches processed
2709/3019 total batches processed
2710/3019 total batches processed
2711/3019 total batches processed
2712/3019 total batches processed
2713/3019 total batches processed
2714/3019 total batches processed
2715/3019 tota

2927/3019 total batches processed
2928/3019 total batches processed
2929/3019 total batches processed
2930/3019 total batches processed
2931/3019 total batches processed
2932/3019 total batches processed
2933/3019 total batches processed
2934/3019 total batches processed
2935/3019 total batches processed
2936/3019 total batches processed
2937/3019 total batches processed
2938/3019 total batches processed
2939/3019 total batches processed
2940/3019 total batches processed
2941/3019 total batches processed
2942/3019 total batches processed
2943/3019 total batches processed
2944/3019 total batches processed
2945/3019 total batches processed
2946/3019 total batches processed
2947/3019 total batches processed
2948/3019 total batches processed
2949/3019 total batches processed
2950/3019 total batches processed
2951/3019 total batches processed
2952/3019 total batches processed
2953/3019 total batches processed
2954/3019 total batches processed
2955/3019 total batches processed
2956/3019 tota

160/201 total batches processed
161/201 total batches processed
162/201 total batches processed
163/201 total batches processed
164/201 total batches processed
165/201 total batches processed
166/201 total batches processed
167/201 total batches processed
168/201 total batches processed
169/201 total batches processed
170/201 total batches processed
171/201 total batches processed
172/201 total batches processed
173/201 total batches processed
174/201 total batches processed
175/201 total batches processed
176/201 total batches processed
177/201 total batches processed
178/201 total batches processed
179/201 total batches processed
180/201 total batches processed
181/201 total batches processed
182/201 total batches processed
183/201 total batches processed
184/201 total batches processed
185/201 total batches processed
186/201 total batches processed
187/201 total batches processed
188/201 total batches processed
189/201 total batches processed
190/201 total batches processed
191/201 

Epoches: 100%|██████████| 1/1 [2:05:10<00:00, 7510.37s/it]

F1-SCORE : 4.1601595318151356e-08

Val : 
PRECISION : 7.49388349229359e-08
RECALL : 9.335237721828585e-06
F1-SCORE : 1.4868410478452813e-07






In [50]:
resnet18_model.eval()

y_true_test, y_pred_test = torch.Tensor([]), torch.Tensor([])
y_true_test, y_pred_test = y_true_test.to(device), y_pred_test.to(device)

print('Test Batches processing...')

with torch.no_grad():
    for batch_index, batch in enumerate(test_dataloader, 0):

        images, categories = batch['img'], batch['category_id']
        images, categories = images.to(device), categories.to(device)

        outputs = resnet18_model(images)

        _, preds = torch.max(outputs,1)

        y_pred_test = torch.cat((y_pred_test, preds))
        y_true_test = torch.cat((y_true_test, categories))
        
        print(f'{batch_index + 1}/{num_of_batches_test} total batches processed')
        
#         break
        

        
print('Test : ')
print(f"PRECISION : {precision_score(y_true_test.cpu().numpy().astype(int), y_pred_test.cpu().numpy().astype(int), average='macro')}")
print(f"RECALL : {recall_score(y_true_test.cpu().numpy().astype(int), y_pred_test.cpu().numpy().astype(int), average='macro')}")
print(f"F1-SCORE : {f1_score(y_true_test.cpu().numpy().astype(int), y_pred_test.cpu().numpy().astype(int), average='macro')}")
print()

Test Batches processing...
1/805 total batches processed
2/805 total batches processed
3/805 total batches processed
4/805 total batches processed
5/805 total batches processed
6/805 total batches processed
7/805 total batches processed
8/805 total batches processed
9/805 total batches processed
10/805 total batches processed
11/805 total batches processed
12/805 total batches processed
13/805 total batches processed
14/805 total batches processed
15/805 total batches processed
16/805 total batches processed
17/805 total batches processed
18/805 total batches processed
19/805 total batches processed
20/805 total batches processed
21/805 total batches processed
22/805 total batches processed
23/805 total batches processed
24/805 total batches processed
25/805 total batches processed
26/805 total batches processed
27/805 total batches processed
28/805 total batches processed
29/805 total batches processed
30/805 total batches processed
31/805 total batches processed
32/805 total batches 

260/805 total batches processed
261/805 total batches processed
262/805 total batches processed
263/805 total batches processed
264/805 total batches processed
265/805 total batches processed
266/805 total batches processed
267/805 total batches processed
268/805 total batches processed
269/805 total batches processed
270/805 total batches processed
271/805 total batches processed
272/805 total batches processed
273/805 total batches processed
274/805 total batches processed
275/805 total batches processed
276/805 total batches processed
277/805 total batches processed
278/805 total batches processed
279/805 total batches processed
280/805 total batches processed
281/805 total batches processed
282/805 total batches processed
283/805 total batches processed
284/805 total batches processed
285/805 total batches processed
286/805 total batches processed
287/805 total batches processed
288/805 total batches processed
289/805 total batches processed
290/805 total batches processed
291/805 

517/805 total batches processed
518/805 total batches processed
519/805 total batches processed
520/805 total batches processed
521/805 total batches processed
522/805 total batches processed
523/805 total batches processed
524/805 total batches processed
525/805 total batches processed
526/805 total batches processed
527/805 total batches processed
528/805 total batches processed
529/805 total batches processed
530/805 total batches processed
531/805 total batches processed
532/805 total batches processed
533/805 total batches processed
534/805 total batches processed
535/805 total batches processed
536/805 total batches processed
537/805 total batches processed
538/805 total batches processed
539/805 total batches processed
540/805 total batches processed
541/805 total batches processed
542/805 total batches processed
543/805 total batches processed
544/805 total batches processed
545/805 total batches processed
546/805 total batches processed
547/805 total batches processed
548/805 

774/805 total batches processed
775/805 total batches processed
776/805 total batches processed
777/805 total batches processed
778/805 total batches processed
779/805 total batches processed
780/805 total batches processed
781/805 total batches processed
782/805 total batches processed
783/805 total batches processed
784/805 total batches processed
785/805 total batches processed
786/805 total batches processed
787/805 total batches processed
788/805 total batches processed
789/805 total batches processed
790/805 total batches processed
791/805 total batches processed
792/805 total batches processed
793/805 total batches processed
794/805 total batches processed
795/805 total batches processed
796/805 total batches processed
797/805 total batches processed
798/805 total batches processed
799/805 total batches processed
800/805 total batches processed
801/805 total batches processed
802/805 total batches processed
803/805 total batches processed
804/805 total batches processed
805/805 

# Submission

In [51]:
resnet18_model.eval()

y_subm = torch.Tensor([]).to(device)
ids = torch.Tensor([]).to(device)

print('Submission Batches processing...')


with torch.no_grad():
    for batch_index, batch in enumerate(test_subm_dataloader, 0):

        images, ids_batch = batch['img'], batch['id']
        images, ids_batch = images.to(device), ids_batch.to(device)

        outputs = resnet18_model(images)

        _, preds = torch.max(outputs,1)

        y_subm = torch.cat((y_subm, preds))
        ids = torch.cat((ids, ids_batch))
        
        print(f'{batch_index + 1}/{num_of_batches_subm} total batches processed')
        
#         break
        
        
y_subm_deprocessed = le_preprocessor.inverse_transform(y_subm.cpu().numpy().astype(int))


submission_df = pd.DataFrame({'Id' : ids.cpu().numpy(), 'Predicted' : y_subm_deprocessed})
submission_df.to_csv('/kaggle/working/resnet18_results_V2.csv')

Submission Batches processing...
1/540 total batches processed
2/540 total batches processed
3/540 total batches processed
4/540 total batches processed
5/540 total batches processed
6/540 total batches processed
7/540 total batches processed
8/540 total batches processed
9/540 total batches processed
10/540 total batches processed
11/540 total batches processed
12/540 total batches processed
13/540 total batches processed
14/540 total batches processed
15/540 total batches processed
16/540 total batches processed
17/540 total batches processed
18/540 total batches processed
19/540 total batches processed
20/540 total batches processed
21/540 total batches processed
22/540 total batches processed
23/540 total batches processed
24/540 total batches processed
25/540 total batches processed
26/540 total batches processed
27/540 total batches processed
28/540 total batches processed
29/540 total batches processed
30/540 total batches processed
31/540 total batches processed
32/540 total ba

260/540 total batches processed
261/540 total batches processed
262/540 total batches processed
263/540 total batches processed
264/540 total batches processed
265/540 total batches processed
266/540 total batches processed
267/540 total batches processed
268/540 total batches processed
269/540 total batches processed
270/540 total batches processed
271/540 total batches processed
272/540 total batches processed
273/540 total batches processed
274/540 total batches processed
275/540 total batches processed
276/540 total batches processed
277/540 total batches processed
278/540 total batches processed
279/540 total batches processed
280/540 total batches processed
281/540 total batches processed
282/540 total batches processed
283/540 total batches processed
284/540 total batches processed
285/540 total batches processed
286/540 total batches processed
287/540 total batches processed
288/540 total batches processed
289/540 total batches processed
290/540 total batches processed
291/540 

517/540 total batches processed
518/540 total batches processed
519/540 total batches processed
520/540 total batches processed
521/540 total batches processed
522/540 total batches processed
523/540 total batches processed
524/540 total batches processed
525/540 total batches processed
526/540 total batches processed
527/540 total batches processed
528/540 total batches processed
529/540 total batches processed
530/540 total batches processed
531/540 total batches processed
532/540 total batches processed
533/540 total batches processed
534/540 total batches processed
535/540 total batches processed
536/540 total batches processed
537/540 total batches processed
538/540 total batches processed
539/540 total batches processed
540/540 total batches processed
541/540 total batches processed


In [52]:
len(submission_df)

138292

In [58]:
submission_df = pd.DataFrame({'Id' : ids.cpu().numpy().astype(int), 'Predicted' : y_subm_deprocessed})
submission_df.to_csv('/kaggle/working/resnet18_results_V2.csv', header=True, index=False)

In [55]:
submission_df

Unnamed: 0,Id,Predicted
0,104891,16399
1,18029,24264
2,35151,16399
3,124144,14503
4,24649,16251
...,...,...
138287,32738,16399
138288,16804,14506
138289,113662,16399
138290,86100,9960


In [57]:
submission_example

Unnamed: 0,Id,Predicted
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
...,...,...
138287,138287,0
138288,138288,0
138289,138289,0
138290,138290,0


In [59]:
submission_df['Predicted'].value_counts()

16399    42688
9960     31344
3460      4413
27448     3601
1361      3353
         ...  
23471        1
31705        1
16478        1
16477        1
26637        1
Name: Predicted, Length: 1050, dtype: int64