In [None]:
# import packages and libraries
external_packages = ["../input/pytorch-image-models/pytorch-image-models-master/"]

In [None]:
import sys

for pth in external_packages:
    sys.path.append(pth)

# torch and model
import timm
import torch
from torch import nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

from sklearn.model_selection import GroupKFold, StratifiedKFold
from sklearn.metrics import confusion_matrix

# basic
import pandas as pd
import numpy as np
import cv2 as cv
import os
import time
import random
from tqdm import tqdm
import json
import imagehash
import matplotlib.pyplot as plt
import seaborn as sn

# data augmentation
from albumentations import (RandomResizedCrop, Transpose, HorizontalFlip, 
                            VerticalFlip, ShiftScaleRotate, Normalize,
                            Resize, Compose)
from albumentations.pytorch import ToTensorV2

%matplotlib inline

In [None]:
BASE_DIR = "../input/cassava-leaf-disease-classification/"

config = {
    'num_folds': 5,
    'seed': 2020,
    'model_arch': 'tf_efficientnet_b4_ns',
    'img_size': 512,
    'epochs': 5,
    'train_batchSize': 16,
    'valid_batchSize': 32,
    'T_0': 10,
    'lr': 1e-4,
    'min_lr': 1e-6,
    'weight_decay':1e-6,
    'num_workers': 4,
    'accum_iter': 2, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda:0', # cuda:0,
    'trainFileInfo': os.path.join(BASE_DIR, "train.csv"),
    'trainImgDir': os.path.join(BASE_DIR, "train_images"),
    'trainPrevImgDir': "../input/cassavadiseasepreviousdata/train/train/",
    'pseudoLabelDataDir': "../input/cassavadiseasepseudolabelingdata/pseudo_labeling_data_99.csv",
    'testImgDir': os.path.join(BASE_DIR, "test_images"),
    'classLableDir': os.path.join(BASE_DIR, "label_num_to_disease_map.json"),
    'submSampleDir': os.path.join(BASE_DIR, "sample_submission.csv")
}

# set seed function
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [None]:
useful_data = pd.read_csv(config['pseudoLabelDataDir'])
useful_data.head()

In [None]:
useful_data[['image_id', 'label', 'image_dir']].head(10)

In [None]:
#utils function

def get_image(pth):
    img_bgr = cv.imread(pth)
    img_rgb = cv.cvtColor(img_bgr, cv.COLOR_BGR2RGB)
    return img_rgb

def get_duplicate_data():
    duplicates_dict = [('1002088496.jpg', 'train-cbsd-614.jpg'), ('1024367055.jpg', 'train-cgm-677.jpg'), ('1046486747.jpg', 'train-cbsd-1272.jpg'), ('1050134400.jpg', 'train-cmd-1049.jpg'), ('1059986462.jpg', 'train-cgm-416.jpg'), ('1069816211.jpg', 'train-cgm-456.jpg'), ('1072259548.jpg', 'train-cbsd-355.jpg'), ('1080179563.jpg', 'train-cmd-10.jpg'), ('1080281713.jpg', 'train-cmd-1009.jpg'), ('1081937072.jpg', 'train-cmd-809.jpg'), ('1089661926.jpg', 'train-cbsd-1248.jpg'), ('1099882902.jpg', 'train-cbsd-50.jpg'), ('1102683272.jpg', 'train-cmd-2099.jpg'), ('1118493919.jpg', 'train-cmd-1643.jpg'), ('112557905.jpg', 'train-cmd-207.jpg'), ('1128747807.jpg', 'train-cmd-2601.jpg'), ('1133309905.jpg', 'train-cgm-714.jpg'), ('114251805.jpg', 'train-cgm-482.jpg'), ('1143548479.jpg', 'train-cmd-1456.jpg'), ('1144657211.jpg', 'train-cmd-646.jpg'), ('11690064.jpg', 'train-cbb-345.jpg'), ('1172621803.jpg', 'train-cmd-2371.jpg'), ('1180559828.jpg', 'train-cmd-986.jpg'), ('1194116552.jpg', 'train-cmd-2630.jpg'), ('1197262681.jpg', 'train-cmd-2222.jpg'), ('1199797130.jpg', 'train-cgm-303.jpg'), ('1209827516.jpg', 'train-cgm-438.jpg'), ('1215008377.jpg', 'train-cmd-42.jpg'), ('1227903119.jpg', 'train-cmd-528.jpg'), ('1234931385.jpg', 'train-cgm-515.jpg'), ('1239998285.jpg', 'train-cbsd-1213.jpg'), ('1242546458.jpg', 'train-cmd-411.jpg'), ('125037460.jpg', 'train-cmd-363.jpg'), ('1252538311.jpg', 'train-cmd-1558.jpg'), ('1261540961.jpg', 'train-cmd-1698.jpg'), ('1264219928.jpg', 'train-cgm-426.jpg'), ('1275608644.jpg', 'train-cmd-1278.jpg'), ('1276802461.jpg', 'train-cbb-261.jpg'), ('1277936443.jpg', 'train-cmd-1970.jpg'), ('1285287595.jpg', 'train-cbsd-293.jpg'), ('1285588852.jpg', 'train-cbsd-446.jpg'), ('128820265.jpg', 'train-cmd-2535.jpg'), ('1290734565.jpg', 'train-cmd-2507.jpg'), ('1302078468.jpg', 'train-cbsd-1203.jpg'), ('1303876802.jpg', 'train-cbb-429.jpg'), ('1324257289.jpg', 'train-cmd-2124.jpg'), ('1335452154.jpg', 'train-cmd-2260.jpg'), ('1335531094.jpg', 'train-cbsd-1390.jpg'), ('1339257499.jpg', 'train-cbb-18.jpg'), ('1348941923.jpg', 'train-cmd-1194.jpg'), ('1359438859.jpg', 'train-cbb-448.jpg'), ('1362645481.jpg', 'train-cmd-2602.jpg'), ('1368162685.jpg', 'train-cgm-213.jpg'), ('1381222280.jpg', 'train-cmd-2481.jpg'), ('1385160350.jpg', 'train-cmd-2316.jpg'), ('1386157120.jpg', 'train-cmd-1724.jpg'), ('1387354222.jpg', 'train-cmd-279.jpg'), ('13975743.jpg', 'train-cgm-291.jpg'), ('1415940915.jpg', 'train-cmd-1659.jpg'), ('1418463220.jpg', 'train-cgm-405.jpg'), ('1424930187.jpg', 'train-cmd-1454.jpg'), ('1432249645.jpg', 'train-cbsd-157.jpg'), ('1438970432.jpg', 'train-cmd-489.jpg'), ('1442451975.jpg', 'train-cmd-1806.jpg'), ('1443356256.jpg', 'train-cgm-109.jpg'), ('144873912.jpg', 'train-cmd-500.jpg'), ('1451862945.jpg', 'train-cmd-100.jpg'), ('1454074626.jpg', 'train-cmd-200.jpg'), ('1455906876.jpg', 'train-cbsd-1056.jpg'), ('145911417.jpg', 'train-cmd-625.jpg'), ('1460538853.jpg', 'train-cmd-839.jpg'), ('1467601507.jpg', 'train-cmd-893.jpg'), ('1472183727.jpg', 'train-cmd-1460.jpg'), ('1476112995.jpg', 'train-cbsd-227.jpg'), ('1478375260.jpg', 'train-cmd-137.jpg'), ('1479168350.jpg', 'train-cmd-1189.jpg'), ('1483777554.jpg', 'train-cmd-1235.jpg'), ('1492079032.jpg', 'train-cbsd-729.jpg'), ('15177566.jpg', 'train-cmd-374.jpg'), ('1531006450.jpg', 'train-cmd-1513.jpg'), ('1561961236.jpg', 'train-cgm-670.jpg'), ('1562043567.jpg', '3551135685.jpg'), ('1571832603.jpg', 'train-cbsd-1185.jpg'), ('1575013487.jpg', 'train-cmd-2016.jpg'), ('1575866220.jpg', 'train-cbsd-39.jpg'), ('1578897742.jpg', 'train-cmd-273.jpg'), ('1578977008.jpg', 'train-cmd-960.jpg'), ('1587009529.jpg', 'train-cmd-1264.jpg'), ('1595577438.jpg', 'train-cgm-125.jpg'), ('1595866872.jpg', 'train-cbsd-34.jpg'), ('1598187662.jpg', 'train-cmd-1332.jpg'), ('1612059624.jpg', 'train-cgm-111.jpg'), ('1612177465.jpg', 'train-cgm-432.jpg'), ('1622568975.jpg', 'train-cbsd-691.jpg'), ('162460466.jpg', 'train-cmd-1748.jpg'), ('162740748.jpg', 'train-cbsd-479.jpg'), ('1635358503.jpg', 'train-cmd-1551.jpg'), ('164781857.jpg', 'train-cmd-804.jpg'), ('1648944957.jpg', 'train-cmd-1978.jpg'), ('1650399375.jpg', 'train-cmd-1841.jpg'), ('1652157522.jpg', 'train-cbsd-137.jpg'), ('1657763940.jpg', 'train-cmd-74.jpg'), ('1666313319.jpg', 'train-cmd-996.jpg'), ('1667719250.jpg', 'train-cmd-1471.jpg'), ('1675424931.jpg', 'train-cgm-592.jpg'), ('1675758805.jpg', 'train-cmd-2358.jpg'), ('1681477511.jpg', 'train-cmd-2583.jpg'), ('1700921498.jpg', 'train-cmd-656.jpg'), ('1706377266.jpg', 'train-cbsd-671.jpg'), ('170814157.jpg', 'train-cgm-167.jpg'), ('1713444587.jpg', 'train-cgm-757.jpg'), ('171546937.jpg', 'train-cbb-193.jpg'), ('1722033032.jpg', 'train-cbb-410.jpg'), ('1724843600.jpg', 'train-cbsd-1034.jpg'), ('1726864717.jpg', 'train-cmd-298.jpg'), ('1727205281.jpg', 'train-cbsd-1016.jpg'), ('1732173388.jpg', 'train-cgm-247.jpg'), ('173849826.jpg', 'train-cgm-404.jpg'), ('1738923752.jpg', 'train-cbsd-321.jpg'), ('1747122718.jpg', 'train-cbsd-795.jpg'), ('1759147122.jpg', 'train-cmd-967.jpg'), ('177187875.jpg', 'train-cmd-1400.jpg'), ('1775105594.jpg', 'train-cmd-2191.jpg'), ('1786164172.jpg', 'train-cgm-542.jpg'), ('178880482.jpg', 'train-cmd-1067.jpg'), ('1789470358.jpg', 'train-cgm-345.jpg'), ('1792425947.jpg', 'train-cgm-764.jpg'), ('179693098.jpg', 'train-cbsd-907.jpg'), ('1814144394.jpg', 'train-cbsd-48.jpg'), ('1815147513.jpg', 'train-cbb-143.jpg'), ('1817648564.jpg', 'train-cmd-1225.jpg'), ('1818510196.jpg', 'train-cbsd-1220.jpg'), ('181857749.jpg', 'train-cgm-680.jpg'), ('1822682006.jpg', 'train-cmd-524.jpg'), ('182701414.jpg', 'train-cbsd-922.jpg'), ('1829924843.jpg', 'train-cgm-84.jpg'), ('1830481275.jpg', 'train-cbb-335.jpg'), ('183535060.jpg', 'train-cmd-1529.jpg'), ('1836918462.jpg', 'train-cbsd-1210.jpg'), ('1845811420.jpg', 'train-cgm-599.jpg'), ('1846052077.jpg', 'train-cmd-1465.jpg'), ('1859265664.jpg', 'train-cmd-1534.jpg'), ('1865586599.jpg', 'train-cbsd-112.jpg'), ('1867590389.jpg', 'train-cgm-196.jpg'), ('1875533805.jpg', 'train-cbsd-1028.jpg'), ('1876922129.jpg', 'train-cbb-365.jpg'), ('1886905036.jpg', 'train-cbb-303.jpg'), ('1889215655.jpg', 'train-cbb-299.jpg'), ('1890187078.jpg', 'train-cbsd-943.jpg'), ('189339781.jpg', 'train-cmd-2112.jpg'), ('1897585920.jpg', 'train-cmd-2420.jpg'), ('190449795.jpg', 'train-cbsd-47.jpg'), ('1907786974.jpg', 'train-cbb-272.jpg'), ('1909170813.jpg', 'train-cmd-1056.jpg'), ('1909629301.jpg', 'train-cbsd-1322.jpg'), ('1923898414.jpg', 'train-cbb-334.jpg'), ('1924122147.jpg', 'train-cmd-1580.jpg'), ('193330948.jpg', 'train-cbsd-1110.jpg'), ('1937240444.jpg', 'train-cgm-522.jpg'), ('1941569739.jpg', 'train-cbsd-593.jpg'), ('1948579246.jpg', 'train-cmd-590.jpg'), ('1958784721.jpg', 'train-cmd-955.jpg'), ('1960632681.jpg', 'train-cbsd-29.jpg'), ('1961893968.jpg', 'train-cgm-690.jpg'), ('1968165864.jpg', 'train-cmd-581.jpg'), ('1969270538.jpg', 'train-cgm-686.jpg'), ('1972732995.jpg', 'train-cmd-1675.jpg'), ('197432034.jpg', 'train-cgm-108.jpg'), ('1978570472.jpg', 'train-cmd-2638.jpg'), ('1981291103.jpg', 'train-cgm-726.jpg'), ('1983266061.jpg', 'train-cmd-1714.jpg'), ('1983607439.jpg', 'train-cmd-1048.jpg'), ('1993434265.jpg', 'train-cmd-2634.jpg'), ('1993626674.jpg', 'train-cmd-1747.jpg'), ('1995608609.jpg', 'train-cgm-115.jpg'), ('1998802568.jpg', 'train-cmd-1395.jpg'), ('2005545104.jpg', 'train-cbb-83.jpg'), ('2011856234.jpg', 'train-cmd-2057.jpg'), ('2012411582.jpg', 'train-cbsd-1391.jpg'), ('2016929365.jpg', 'train-cmd-1261.jpg'), ('2021239499.jpg', 'train-cgm-623.jpg'), ('2025224084.jpg', 'train-cmd-1231.jpg'), ('203075136.jpg', 'train-cgm-671.jpg'), ('20361463.jpg', 'train-cbsd-1032.jpg'), ('20518383.jpg', 'train-cgm-742.jpg'), ('2058684347.jpg', 'train-cgm-463.jpg'), ('2059449835.jpg', 'train-cmd-1543.jpg'), ('2066754199.jpg', 'train-cbsd-755.jpg'), ('2069885945.jpg', 'train-cbsd-921.jpg'), ('2084909272.jpg', 'train-cmd-1442.jpg'), ('2105589928.jpg', 'train-cgm-189.jpg'), ('2109494038.jpg', 'train-cmd-712.jpg'), ('2110727627.jpg', 'train-cgm-625.jpg'), ('2115508050.jpg', 'train-cmd-2262.jpg'), ('2129562481.jpg', 'train-cmd-1008.jpg'), ('2133889430.jpg', 'train-cmd-1327.jpg'), ('2139342889.jpg', 'train-cgm-314.jpg'), ('2148757008.jpg', 'train-cbsd-1285.jpg'), ('2149970580.jpg', 'train-cbb-73.jpg'), ('2161608186.jpg', 'train-cmd-1834.jpg'), ('2164751945.jpg', 'train-cmd-1259.jpg'), ('2173229407.jpg', 'train-cgm-697.jpg'), ('2177675284.jpg', 'train-cbb-361.jpg'), ('2184718270.jpg', 'train-cgm-664.jpg'), ('2186683331.jpg', 'train-cmd-686.jpg'), ('2203204324.jpg', 'train-cbb-386.jpg'), ('220707732.jpg', 'train-cgm-673.jpg'), ('2207440318.jpg', 'train-cmd-1136.jpg'), ('2208909764.jpg', 'train-cmd-2505.jpg'), ('2216899710.jpg', 'train-cmd-2482.jpg'), ('2221731773.jpg', 'train-cmd-128.jpg'), ('2221889127.jpg', 'train-cbb-19.jpg'), ('2231704125.jpg', 'train-cgm-688.jpg'), ('2232959596.jpg', 'train-cmd-2652.jpg'), ('2241394681.jpg', 'train-cgm-343.jpg'), ('2252529694.jpg', '911861181.jpg'), ('2252678193.jpg', 'train-cgm-410.jpg'), ('2260330058.jpg', 'train-cmd-112.jpg'), ('2260521441.jpg', 'train-cgm-157.jpg'), ('2264163141.jpg', 'train-cbb-355.jpg'), ('2265725202.jpg', 'train-cmd-1315.jpg'), ('226962956.jpg', 'train-cbsd-623.jpg'), ('227401382.jpg', 'train-cmd-353.jpg'), ('2308142554.jpg', 'train-cmd-782.jpg'), ('2314598518.jpg', 'train-cmd-2046.jpg'), ('2316499026.jpg', 'train-cmd-577.jpg'), ('2317623527.jpg', 'train-cmd-1853.jpg'), ('2318645335.jpg', 'train-cbb-136.jpg'), ('2328599167.jpg', 'train-cmd-1084.jpg'), ('2329316953.jpg', 'train-cmd-1229.jpg'), ('2330851012.jpg', 'train-cbsd-754.jpg'), ('2331038619.jpg', 'train-cmd-1282.jpg'), ('2332024831.jpg', 'train-cbsd-1395.jpg'), ('2338213285.jpg', 'train-cgm-543.jpg'), ('2339596137.jpg', 'train-cmd-275.jpg'), ('2344308543.jpg', 'train-cmd-1349.jpg'), ('2351508179.jpg', 'train-cmd-816.jpg'), ('2355374074.jpg', 'train-cgm-590.jpg'), ('2356810303.jpg', 'train-cmd-121.jpg'), ('2357741257.jpg', 'train-cmd-746.jpg'), ('2358762057.jpg', 'train-cmd-1138.jpg'), ('2360494692.jpg', 'train-cmd-2657.jpg'), ('2371633225.jpg', 'train-cmd-1433.jpg'), ('2380764597.jpg', 'train-cbb-41.jpg'), ('238289094.jpg', 'train-cmd-850.jpg'), ('2386253796.jpg', 'train-cmd-735.jpg'), ('2389613525.jpg', 'train-cmd-826.jpg'), ('241455389.jpg', 'train-cbsd-625.jpg'), ('2417890720.jpg', 'train-cgm-549.jpg'), ('241826799.jpg', 'train-cbb-321.jpg'), ('2418298019.jpg', 'train-cmd-842.jpg'), ('2418594163.jpg', 'train-cmd-1691.jpg'), ('2427442391.jpg', 'train-cmd-1451.jpg'), ('2430831157.jpg', 'train-cmd-1141.jpg'), ('2438313768.jpg', 'train-cgm-143.jpg'), ('2450978537.jpg', 'train-cmd-2596.jpg'), ('2467324210.jpg', 'train-cmd-539.jpg'), ('2476584583.jpg', 'train-cgm-103.jpg'), ('2481954386.jpg', 'train-cbsd-337.jpg'), ('2494764703.jpg', 'train-cgm-31.jpg'), ('2501436016.jpg', 'train-cmd-1861.jpg'), ('250907427.jpg', 'train-cbsd-329.jpg'), ('2512949736.jpg', 'train-cmd-2512.jpg'), ('2518535006.jpg', 'train-cgm-375.jpg'), ('2520781924.jpg', 'train-cmd-1167.jpg'), ('2522202499.jpg', 'train-cbsd-823.jpg'), ('2535406918.jpg', 'train-cgm-42.jpg'), ('2536424998.jpg', 'train-cbsd-285.jpg'), ('2548461545.jpg', 'train-cbb-254.jpg'), ('2549375363.jpg', 'train-cgm-226.jpg'), ('2552592093.jpg', 'train-cbb-90.jpg'), ('255823836.jpg', 'train-cbsd-1327.jpg'), ('2560468045.jpg', 'train-cmd-492.jpg'), ('2570672557.jpg', 'train-cbsd-1337.jpg'), ('2571585254.jpg', 'train-cbb-117.jpg'), ('2571818236.jpg', 'train-cmd-694.jpg'), ('2577910904.jpg', 'train-cmd-2194.jpg'), ('2582937466.jpg', 'train-cbsd-1074.jpg'), ('2588775979.jpg', 'train-cmd-2572.jpg'), ('2593976226.jpg', 'train-cbsd-868.jpg'), ('2594655853.jpg', 'train-cmd-428.jpg'), ('2595067395.jpg', 'train-cmd-2353.jpg'), ('2596761882.jpg', 'train-cmd-1924.jpg'), ('2601582697.jpg', 'train-cmd-276.jpg'), ('2606643559.jpg', 'train-cmd-792.jpg'), ('261186049.jpg', 'train-cmd-478.jpg'), ('2620149411.jpg', 'train-cbsd-742.jpg'), ('2621100310.jpg', 'train-cmd-2317.jpg'), ('2621947862.jpg', 'train-cbsd-708.jpg'), ('2627358594.jpg', 'train-cmd-1350.jpg'), ('2647070246.jpg', 'train-cgm-180.jpg'), ('2651363651.jpg', 'train-cmd-900.jpg'), ('265704323.jpg', 'train-cgm-328.jpg'), ('2657864406.jpg', 'train-cmd-631.jpg'), ('2674468156.jpg', 'train-cmd-1037.jpg'), ('2684352665.jpg', 'train-cbsd-780.jpg'), ('2689546990.jpg', 'train-cgm-54.jpg'), ('2700366371.jpg', 'train-cgm-434.jpg'), ('2711532702.jpg', 'train-cmd-2398.jpg'), ('271724485.jpg', 'train-cmd-991.jpg'), ('2722584545.jpg', 'train-cmd-1959.jpg'), ('2741057702.jpg', 'train-cbsd-930.jpg'), ('2754939037.jpg', 'train-cgm-552.jpg'), ('2759452874.jpg', 'train-cmd-2058.jpg'), ('2759500008.jpg', 'train-cmd-1851.jpg'), ('2759737963.jpg', 'train-cbsd-1070.jpg'), ('2760869322.jpg', 'train-cbsd-181.jpg'), ('2763304605.jpg', 'train-cbsd-1254.jpg'), ('2764505542.jpg', 'train-cbsd-796.jpg'), ('2764717089.jpg', 'train-cbsd-1002.jpg'), ('2766931963.jpg', 'train-cbb-307.jpg'), ('2768992642.jpg', 'train-cmd-1293.jpg'), ('278340134.jpg', 'train-cmd-537.jpg'), ('2792568356.jpg', 'train-cmd-771.jpg'), ('2813455055.jpg', 'train-cmd-1289.jpg'), ('2813897976.jpg', 'train-cbsd-1318.jpg'), ('2820203229.jpg', 'train-cmd-983.jpg'), ('2830245641.jpg', 'train-cgm-76.jpg'), ('2832996503.jpg', 'train-cbb-444.jpg'), ('2845701741.jpg', 'train-cmd-1102.jpg'), ('2847670157.jpg', 'train-cmd-1732.jpg'), ('2853035567.jpg', 'train-cmd-2175.jpg'), ('2861545981.jpg', 'train-cgm-51.jpg'), ('286515278.jpg', 'train-cmd-2571.jpg'), ('2870742473.jpg', 'train-cmd-1129.jpg'), ('2879061263.jpg', 'train-cgm-329.jpg'), ('2888640560.jpg', 'train-cgm-121.jpg'), ('289082000.jpg', 'train-cmd-1426.jpg'), ('2891895109.jpg', 'train-cmd-312.jpg'), ('2892238301.jpg', 'train-cmd-1432.jpg'), ('2902102449.jpg', 'train-cmd-299.jpg'), ('2907262343.jpg', 'train-cbb-402.jpg'), ('290867441.jpg', 'train-cbsd-1183.jpg'), ('2909347864.jpg', 'train-cmd-465.jpg'), ('2919116944.jpg', 'train-cbb-113.jpg'), ('293090840.jpg', 'train-cbsd-830.jpg'), ('2940017595.jpg', 'train-cmd-2309.jpg'), ('2941037051.jpg', 'train-cmd-2625.jpg'), ('2947092934.jpg', 'train-cbsd-636.jpg'), ('2948559947.jpg', 'train-cmd-1527.jpg'), ('2948942886.jpg', 'train-cbsd-1281.jpg'), ('2958100896.jpg', 'train-cmd-1725.jpg'), ('2963358758.jpg', 'train-cbsd-549.jpg'), ('2964066000.jpg', 'train-cmd-1624.jpg'), ('2966195606.jpg', 'train-cgm-256.jpg'), ('2967206024.jpg', 'train-cgm-458.jpg'), ('2970234083.jpg', 'train-cgm-761.jpg'), ('2972617511.jpg', 'train-cgm-703.jpg'), ('2978135052.jpg', 'train-cgm-412.jpg'), ('2983246696.jpg', 'train-cmd-87.jpg'), ('298424266.jpg', 'train-cmd-1326.jpg'), ('2985118735.jpg', 'train-cmd-2195.jpg'), ('3001382345.jpg', 'train-cbsd-251.jpg'), ('3004362957.jpg', 'train-cmd-644.jpg'), ('3006499770.jpg', 'train-cbsd-195.jpg'), ('3013471955.jpg', 'train-cmd-131.jpg'), ('3024747455.jpg', 'train-cmd-134.jpg'), ('3027691323.jpg', 'train-cgm-744.jpg'), ('3049440979.jpg', 'train-cmd-1679.jpg'), ('3054390977.jpg', 'train-cmd-1548.jpg'), ('3058038323.jpg', 'train-cmd-1443.jpg'), ('3058256804.jpg', 'train-cmd-2526.jpg'), ('306133807.jpg', 'train-cmd-293.jpg'), ('3073409707.jpg', 'train-cmd-2589.jpg'), ('3082163001.jpg', 'train-cmd-1025.jpg'), ('3097440014.jpg', 'train-cbb-225.jpg'), ('3100925014.jpg', 'train-cgm-170.jpg'), ('3114522519.jpg', 'train-cmd-1230.jpg'), ('3116498490.jpg', 'train-cbb-38.jpg'), ('3126447146.jpg', 'train-cmd-2588.jpg'), ('3132206324.jpg', 'train-cgm-257.jpg'), ('3135130073.jpg', 'train-cmd-1895.jpg'), ('3141049473.jpg', 'train-cmd-2377.jpg'), ('3146983924.jpg', 'train-cmd-1636.jpg'), ('3151955638.jpg', 'train-cgm-402.jpg'), ('3153618395.jpg', 'train-cmd-865.jpg'), ('3164836946.jpg', 'train-cbsd-10.jpg'), ('3175363969.jpg', 'train-cbb-220.jpg'), ('3181600056.jpg', 'train-cgm-718.jpg'), ('3182736637.jpg', 'train-cgm-73.jpg'), ('3185750323.jpg', 'train-cgm-566.jpg'), ('3188097509.jpg', 'train-cgm-230.jpg'), ('3188727997.jpg', 'train-cmd-542.jpg'), ('318991539.jpg', 'train-cmd-1749.jpg'), ('3194661838.jpg', 'train-cmd-2394.jpg'), ('3199430859.jpg', 'train-cmd-1696.jpg'), ('3201556287.jpg', 'train-cmd-1019.jpg'), ('3214730186.jpg', 'train-cbb-279.jpg'), ('3216675047.jpg', 'train-cmd-1610.jpg'), ('3228239886.jpg', 'train-cbb-79.jpg'), ('3229179609.jpg', 'train-cbsd-1094.jpg'), ('3237922501.jpg', 'train-cgm-295.jpg'), ('3252706161.jpg', 'train-cmd-2143.jpg'), ('3259397427.jpg', 'train-cmd-1920.jpg'), ('3262221593.jpg', 'train-cbb-122.jpg'), ('3267041557.jpg', 'train-cbsd-1029.jpg'), ('3268898958.jpg', 'train-cgm-187.jpg'), ('3269286573.jpg', 'train-cmd-408.jpg'), ('3279881598.jpg', 'train-cbb-36.jpg'), ('328959114.jpg', 'train-cgm-205.jpg'), ('3289998998.jpg', 'train-cmd-2594.jpg'), ('3290949725.jpg', 'train-cbb-346.jpg'), ('3293589198.jpg', 'train-cbsd-183.jpg'), ('3294433487.jpg', 'train-cbsd-1173.jpg'), ('3296434764.jpg', 'train-cmd-1940.jpg'), ('3314463308.jpg', 'train-cmd-1519.jpg'), ('3315868108.jpg', 'train-cgm-50.jpg'), ('3315979770.jpg', 'train-cmd-1765.jpg'), ('3325565280.jpg', 'train-cmd-84.jpg'), ('3331347285.jpg', 'train-cgm-0.jpg'), ('3335254269.jpg', 'train-cbsd-349.jpg'), ('3341146922.jpg', 'train-cmd-237.jpg'), ('3341233301.jpg', 'train-cmd-877.jpg'), ('3343675403.jpg', 'train-cmd-1812.jpg'), ('3345498406.jpg', 'train-cmd-2236.jpg'), ('3347545196.jpg', 'train-cbsd-634.jpg'), ('3350687852.jpg', 'train-cmd-290.jpg'), ('3358660933.jpg', 'train-cmd-1511.jpg'), ('3370367169.jpg', 'train-cgm-162.jpg'), ('3384415464.jpg', 'train-cgm-119.jpg'), ('3385144102.jpg', 'train-cgm-370.jpg'), ('3385309388.jpg', 'train-cbsd-62.jpg'), ('3390516449.jpg', 'train-cmd-333.jpg'), ('3395914437.jpg', 'train-cgm-309.jpg'), ('3397650599.jpg', 'train-cbb-221.jpg'), ('3408364070.jpg', 'train-cmd-903.jpg'), ('3408460296.jpg', 'train-cmd-1911.jpg'), ('3411831481.jpg', 'train-cmd-691.jpg'), ('3413715358.jpg', 'train-cgm-528.jpg'), ('3424307098.jpg', 'train-cmd-1907.jpg'), ('3424618786.jpg', 'train-cmd-380.jpg'), ('342796483.jpg', 'train-cmd-217.jpg'), ('343990809.jpg', 'train-cbsd-336.jpg'), ('3441163294.jpg', 'train-cmd-2155.jpg'), ('3451340587.jpg', 'train-cmd-2489.jpg'), ('3457494065.jpg', 'train-cgm-264.jpg'), ('3458199144.jpg', 'train-cmd-652.jpg'), ('3462014192.jpg', 'train-cgm-748.jpg'), ('3474522679.jpg', 'train-cbsd-1011.jpg'), ('3476081387.jpg', 'train-cbb-282.jpg'), ('3489790909.jpg', 'train-cbsd-1286.jpg'), ('3492066669.jpg', 'train-cmd-1330.jpg'), ('3505403837.jpg', 'train-cmd-1567.jpg'), ('3506364620.jpg', 'train-cbsd-275.jpg'), ('3511671285.jpg', 'train-cbsd-1240.jpg'), ('3521763231.jpg', 'train-cbsd-1421.jpg'), ('3523363514.jpg', 'train-cmd-2140.jpg'), ('3538242745.jpg', 'train-cbsd-595.jpg'), ('3540055410.jpg', 'train-cmd-871.jpg'), ('3542768898.jpg', 'train-cmd-624.jpg'), ('3546777867.jpg', 'train-cgm-113.jpg'), ('3552298032.jpg', 'train-cmd-1448.jpg'), ('3561701886.jpg', 'train-cgm-505.jpg'), ('3566226674.jpg', 'train-cmd-2523.jpg'), ('3576823132.jpg', 'train-cgm-333.jpg'), ('3579018611.jpg', 'train-cgm-445.jpg'), ('3580388018.jpg', 'train-cmd-674.jpg'), ('35875939.jpg', 'train-cmd-2055.jpg'), ('358823158.jpg', 'train-cmd-1427.jpg'), ('3594689734.jpg', 'train-cbsd-1087.jpg'), ('3606759049.jpg', 'train-cbsd-66.jpg'), ('3608078575.jpg', 'train-cmd-2401.jpg'), ('3610820560.jpg', 'train-cgm-722.jpg'), ('3626400961.jpg', 'train-cmd-1757.jpg'), ('3630015138.jpg', 'train-cbb-72.jpg'), ('3632711020.jpg', 'train-cmd-2050.jpg'), ('3637416250.jpg', 'train-cbsd-107.jpg'), ('3638122648.jpg', 'train-cmd-211.jpg'), ('3645245816.jpg', 'train-cgm-363.jpg'), ('3649285117.jpg', 'train-cbsd-437.jpg'), ('3651958252.jpg', 'train-cmd-2491.jpg'), ('366195058.jpg', 'train-cmd-1296.jpg'), ('3667237103.jpg', 'train-cmd-545.jpg'), ('368553798.jpg', 'train-cmd-1908.jpg'), ('3688529022.jpg', 'train-cmd-2204.jpg'), ('369175053.jpg', 'train-cbsd-1316.jpg'), ('3691870719.jpg', 'train-cmd-2390.jpg'), ('369451134.jpg', 'train-cmd-1062.jpg'), ('3696011777.jpg', 'train-cmd-2502.jpg'), ('3698876668.jpg', 'train-cmd-846.jpg'), ('3708168447.jpg', 'train-cmd-283.jpg'), ('3708517022.jpg', 'train-cmd-1870.jpg'), ('3718347785.jpg', 'train-cmd-1919.jpg'), ('3722626623.jpg', 'train-cmd-1200.jpg'), ('3727359090.jpg', 'train-cgm-268.jpg'), ('3731008076.jpg', 'train-cbb-60.jpg'), ('3731052059.jpg', 'train-cbsd-1398.jpg'), ('3731328789.jpg', 'train-cmd-522.jpg'), ('3741620114.jpg', 'train-cmd-1875.jpg'), ('3752575686.jpg', 'train-cmd-1196.jpg'), ('3755456249.jpg', 'train-cmd-2128.jpg'), ('3770952591.jpg', 'train-cbsd-697.jpg'), ('3772064912.jpg', 'train-cgm-519.jpg'), ('3775082960.jpg', 'train-cbb-310.jpg'), ('3781670038.jpg', 'train-cgm-78.jpg'), ('3782909126.jpg', 'train-cbsd-837.jpg'), ('3785055674.jpg', 'train-cmd-622.jpg'), ('378894822.jpg', 'train-cmd-360.jpg'), ('379373523.jpg', 'train-cgm-323.jpg'), ('37954651.jpg', 'train-cmd-1705.jpg'), ('3809163419.jpg', 'train-cbb-382.jpg'), ('3810740135.jpg', 'train-cmd-2034.jpg'), ('3813835902.jpg', 'train-cgm-154.jpg'), ('3816048744.jpg', 'train-cbsd-1221.jpg'), ('3821611662.jpg', 'train-cbsd-353.jpg'), ('3826775864.jpg', 'train-cgm-503.jpg'), ('3829488807.jpg', 'train-cmd-105.jpg'), ('3833092976.jpg', 'train-cmd-1013.jpg'), ('3837689204.jpg', 'train-cmd-447.jpg'), ('3838205917.jpg', 'train-cmd-1796.jpg'), ('383932080.jpg', 'train-cbb-97.jpg'), ('384122684.jpg', 'train-cmd-1729.jpg'), ('3847172492.jpg', 'train-cmd-2460.jpg'), ('3848558113.jpg', 'train-cmd-517.jpg'), ('3848622850.jpg', 'train-cmd-1947.jpg'), ('3856769685.jpg', 'train-cmd-2474.jpg'), ('385685508.jpg', 'train-cbsd-305.jpg'), ('3859028489.jpg', 'train-cgm-8.jpg'), ('3876777651.jpg', 'train-cmd-1912.jpg'), ('3892133252.jpg', 'train-cgm-675.jpg'), ('3894262995.jpg', 'train-cmd-1958.jpg'), ('3903538298.jpg', 'train-cmd-2170.jpg'), ('3905302037.jpg', 'train-cbb-308.jpg'), ('391259058.jpg', 'train-cbsd-287.jpg'), ('3912926258.jpg', 'train-cgm-487.jpg'), ('3921328805.jpg', 'train-cbsd-872.jpg'), ('3924602971.jpg', 'train-cbsd-1106.jpg'), ('392503327.jpg', 'train-cbsd-1325.jpg'), ('3930711994.jpg', 'train-cmd-2214.jpg'), ('3938349285.jpg', 'train-cbb-275.jpg'), ('3943325497.jpg', 'train-cmd-296.jpg'), ('3946178245.jpg', 'train-cbsd-1099.jpg'), ('3948333262.jpg', 'train-cmd-1246.jpg'), ('3951364046.jpg', 'train-cmd-195.jpg'), ('3951384519.jpg', 'train-cmd-502.jpg'), ('3953222407.jpg', 'train-cgm-608.jpg'), ('3958304403.jpg', 'train-cbsd-679.jpg'), ('3963572251.jpg', 'train-cgm-622.jpg'), ('3966256467.jpg', 'train-cbb-437.jpg'), ('3968384941.jpg', 'train-cgm-589.jpg'), ('3974310104.jpg', 'train-cbb-259.jpg'), ('397477697.jpg', 'train-cgm-546.jpg'), ('3978633568.jpg', 'train-cmd-1017.jpg'), ('3995159118.jpg', 'train-cmd-473.jpg'), ('4014877464.jpg', 'train-cmd-1142.jpg'), ('4024391744.jpg', 'train-cgm-5.jpg'), ('4031122706.jpg', 'train-cmd-1154.jpg'), ('4031188863.jpg', 'train-cmd-2259.jpg'), ('403372857.jpg', 'train-cbb-255.jpg'), ('403458333.jpg', 'train-cbsd-147.jpg'), ('4038344014.jpg', 'train-cmd-2590.jpg'), ('4048486399.jpg', 'train-cmd-1360.jpg'), ('4048519217.jpg', 'train-cbsd-238.jpg'), ('405720625.jpg', 'train-cmd-2.jpg'), ('4059451569.jpg', 'train-cbsd-840.jpg'), ('4071582691.jpg', 'train-cmd-1244.jpg'), ('4079242692.jpg', 'train-cgm-275.jpg'), ('4083768019.jpg', 'train-cbb-155.jpg'), ('4088249542.jpg', 'train-cbb-100.jpg'), ('4091663475.jpg', 'train-cbb-414.jpg'), ('409474529.jpg', 'train-cmd-429.jpg'), ('4098473118.jpg', 'train-cmd-2519.jpg'), ('4103177818.jpg', 'train-cmd-676.jpg'), ('4103428960.jpg', 'train-cgm-535.jpg'), ('4110644267.jpg', 'train-cgm-563.jpg'), ('4111161962.jpg', 'train-cbsd-681.jpg'), ('4121231239.jpg', 'train-cgm-497.jpg'), ('4141439714.jpg', 'train-cmd-253.jpg'), ('4141594059.jpg', 'train-cmd-1032.jpg'), ('414320641.jpg', 'train-cbsd-187.jpg'), ('4145051602.jpg', 'train-cmd-1014.jpg'), ('4146091086.jpg', 'train-cbsd-622.jpg'), ('4161605185.jpg', 'train-cmd-1577.jpg'), ('4166762.jpg', 'train-cmd-382.jpg'), ('4170665280.jpg', 'train-cmd-482.jpg'), ('417083161.jpg', 'train-cgm-169.jpg'), ('4183077936.jpg', 'train-cmd-2604.jpg'), ('4183078751.jpg', 'train-cmd-1352.jpg'), ('4186901068.jpg', 'train-cmd-70.jpg'), ('4211138249.jpg', 'train-cmd-822.jpg'), ('4222515459.jpg', 'train-cmd-458.jpg'), ('4230605387.jpg', 'train-cgm-71.jpg'), ('423288187.jpg', 'train-cbb-161.jpg'), ('4243169950.jpg', 'train-cgm-91.jpg'), ('4248343921.jpg', 'train-cbsd-1123.jpg'), ('4250538490.jpg', 'train-cbsd-1358.jpg'), ('4254213032.jpg', 'train-cgm-48.jpg'), ('4255100884.jpg', 'train-cmd-1453.jpg'), ('4287369745.jpg', 'train-cmd-67.jpg'), ('4290607578.jpg', 'train-cmd-1446.jpg'), ('4292224219.jpg', 'train-cmd-2123.jpg'), ('431411749.jpg', 'train-cbsd-1442.jpg'), ('437958298.jpg', 'train-cbsd-292.jpg'), ('446546740.jpg', 'train-cmd-1918.jpg'), ('456001532.jpg', 'train-cmd-167.jpg'), ('469487.jpg', 'train-cbsd-323.jpg'), ('483398598.jpg', 'train-cmd-96.jpg'), ('51063556.jpg', 'train-cmd-1560.jpg'), ('514376645.jpg', 'train-cmd-2047.jpg'), ('519050764.jpg', 'train-cbb-129.jpg'), ('537771989.jpg', 'train-cmd-1430.jpg'), ('546931175.jpg', 'train-cgm-199.jpg'), ('554118057.jpg', 'train-cmd-1686.jpg'), ('560592460.jpg', 'train-cgm-716.jpg'), ('561647799.jpg', 'train-cbsd-80.jpg'), ('562027159.jpg', 'train-cgm-248.jpg'), ('579336785.jpg', 'train-cmd-1738.jpg'), ('586116935.jpg', 'train-cgm-770.jpg'), ('594027454.jpg', 'train-cbsd-1319.jpg'), ('598311175.jpg', 'train-cmd-864.jpg'), ('599325048.jpg', 'train-cbb-418.jpg'), ('610094717.jpg', 'train-cmd-504.jpg'), ('61492497.jpg', 'train-cmd-639.jpg'), ('620126996.jpg', 'train-cgm-368.jpg'), ('630240307.jpg', 'train-cmd-2514.jpg'), ('632236115.jpg', 'train-cbsd-1222.jpg'), ('643956994.jpg', 'train-cgm-399.jpg'), ('64732457.jpg', 'train-cmd-2553.jpg'), ('659495074.jpg', 'train-cbsd-381.jpg'), ('660715584.jpg', 'train-cmd-425.jpg'), ('66458318.jpg', 'train-cbsd-426.jpg'), ('664583876.jpg', 'train-cgm-712.jpg'), ('667282886.jpg', 'train-cmd-1288.jpg'), ('674360414.jpg', 'train-cbsd-23.jpg'), ('680272982.jpg', 'train-cmd-1814.jpg'), ('700113045.jpg', 'train-cmd-2432.jpg'), ('702121018.jpg', 'train-cbsd-22.jpg'), ('704115558.jpg', 'train-cbsd-156.jpg'), ('705481569.jpg', 'train-cmd-1276.jpg'), ('707255804.jpg', 'train-cmd-1944.jpg'), ('724059503.jpg', 'train-cmd-567.jpg'), ('728569501.jpg', 'train-cmd-60.jpg'), ('745135323.jpg', 'train-cbsd-1216.jpg'), ('746879421.jpg', 'train-cbb-142.jpg'), ('755437879.jpg', 'train-cmd-2014.jpg'), ('762558370.jpg', 'train-cmd-1579.jpg'), ('773130398.jpg', 'train-cmd-483.jpg'), ('774801310.jpg', 'train-cmd-832.jpg'), ('777180004.jpg', 'train-cgm-352.jpg'), ('787756871.jpg', 'train-cmd-1412.jpg'), ('797094434.jpg', 'train-cmd-725.jpg'), ('807190211.jpg', 'train-cmd-1795.jpg'), ('816689138.jpg', 'train-cmd-2413.jpg'), ('823298994.jpg', 'train-cbsd-519.jpg'), ('843483418.jpg', 'train-cbsd-1299.jpg'), ('867127390.jpg', 'train-cbsd-1150.jpg'), ('868228069.jpg', 'train-cmd-2364.jpg'), ('872498616.jpg', 'train-cbb-393.jpg'), ('881299929.jpg', 'train-cmd-1238.jpg'), ('888266861.jpg', 'train-cbsd-451.jpg'), ('891320496.jpg', 'train-cbsd-1383.jpg'), ('897745173.jpg', 'train-cmd-91.jpg'), ('899398361.jpg', 'train-cmd-2520.jpg'), ('918605153.jpg', 'train-cgm-446.jpg'), ('930097123.jpg', 'train-cmd-749.jpg'), ('939861475.jpg', 'train-cmd-115.jpg'), ('945680317.jpg', 'train-cmd-55.jpg'), ('96041444.jpg', 'train-cbsd-1120.jpg'), ('963176335.jpg', 'train-cmd-944.jpg'), ('980448273.jpg', 'train-cbsd-758.jpg'), ('980911264.jpg', 'train-cmd-617.jpg'), ('981946821.jpg', 'train-cmd-2547.jpg'), ('988174802.jpg', 'train-cbb-149.jpg'), ('996534381.jpg', 'train-cbsd-331.jpg'), ('train-cbb-115.jpg', 'train-cbsd-656.jpg'), ('train-cbb-213.jpg', 'train-cgm-321.jpg'), ('train-cbb-216.jpg', 'train-cgm-687.jpg'), ('train-cbb-244.jpg', 'train-cbsd-632.jpg'), ('train-cbb-247.jpg', 'train-cbsd-328.jpg'), ('train-cbb-306.jpg', 'train-cbsd-931.jpg'), ('train-cbb-317.jpg', 'train-cbsd-240.jpg'), ('train-cbb-371.jpg', 'train-cbb-427.jpg'), ('train-cbb-371.jpg', 'train-cbb-94.jpg'), ('train-cbb-374.jpg', 'train-cgm-698.jpg'), ('train-cbb-381.jpg', 'train-cbsd-216.jpg'), ('train-cbb-405.jpg', 'train-cgm-246.jpg'), ('train-cbb-427.jpg', 'train-cbb-94.jpg'), ('train-cbsd-1033.jpg', 'train-cbsd-352.jpg'), ('train-cbsd-1046.jpg', 'train-cgm-253.jpg'), ('train-cbsd-1051.jpg', 'train-cbsd-1101.jpg'), ('train-cbsd-1172.jpg', 'train-cbsd-358.jpg'), ('train-cbsd-12.jpg', 'train-cbsd-1297.jpg'), ('train-cbsd-1246.jpg', 'train-cbsd-138.jpg'), ('train-cbsd-1246.jpg', 'train-cbsd-654.jpg'), ('train-cbsd-1354.jpg', 'train-cgm-455.jpg'), ('train-cbsd-138.jpg', 'train-cbsd-654.jpg'), ('train-cbsd-1419.jpg', 'train-cgm-279.jpg'), ('train-cbsd-143.jpg', 'train-cbsd-661.jpg'), ('train-cbsd-17.jpg', 'train-cbsd-713.jpg'), ('train-cbsd-173.jpg', 'train-cbsd-978.jpg'), ('train-cbsd-174.jpg', 'train-cbsd-243.jpg'), ('train-cbsd-193.jpg', 'train-cbsd-304.jpg'), ('train-cbsd-233.jpg', 'train-cbsd-270.jpg'), ('train-cbsd-257.jpg', 'train-cbsd-650.jpg'), ('train-cbsd-277.jpg', 'train-cbsd-610.jpg'), ('train-cbsd-421.jpg', 'train-cbsd-985.jpg'), ('train-cbsd-481.jpg', 'train-cmd-477.jpg'), ('train-cbsd-513.jpg', 'train-cbsd-530.jpg'), ('train-cbsd-533.jpg', 'train-cbsd-785.jpg'), ('train-cbsd-660.jpg', 'train-healthy-50.jpg'), ('train-cbsd-661.jpg', 'train-cbsd-717.jpg'), ('train-cbsd-666.jpg', 'train-cgm-166.jpg'), ('train-cbsd-791.jpg', 'train-cgm-90.jpg'), ('train-cgm-304.jpg', 'train-cgm-760.jpg'), ('train-cgm-396.jpg', 'train-cgm-477.jpg'), ('train-cgm-396.jpg', 'train-cgm-565.jpg'), ('train-cgm-437.jpg', 'train-cgm-652.jpg'), ('train-cgm-477.jpg', 'train-cgm-565.jpg'), ('train-cmd-1150.jpg', 'train-cmd-2167.jpg'), ('train-cmd-1165.jpg', 'train-cmd-186.jpg'), ('train-cmd-1165.jpg', 'train-cmd-450.jpg'), ('train-cmd-1248.jpg', 'train-cmd-444.jpg'), ('train-cmd-149.jpg', 'train-cmd-582.jpg'), ('train-cmd-153.jpg', 'train-cmd-1648.jpg'), ('train-cmd-1600.jpg', 'train-cmd-1771.jpg'), ('train-cmd-1600.jpg', 'train-cmd-1894.jpg'), ('train-cmd-1771.jpg', 'train-cmd-1894.jpg'), ('train-cmd-2003.jpg', 'train-cmd-966.jpg'), ('train-cmd-220.jpg', 'train-cmd-2431.jpg'), ('train-cmd-2302.jpg', 'train-cmd-2399.jpg'), ('train-cmd-2451.jpg', 'train-healthy-149.jpg'), ('train-healthy-136.jpg', 'train-healthy-77.jpg')]
    print(f"number of duplicates: {len(duplicates_dict)}")
    
    not_load = []
    for (_, sim) in duplicates_dict:
        not_load.append(sim)
    
    not_load = set(not_load)
    
    del(duplicates_dict)
    
    return not_load

def process_train_file(train_info_pth, label_pth, img_pth):
    trainFile = pd.read_csv(train_info_pth).reset_index(drop=True)
    # basic information
    print(trainFile.info())
    
    num_records = len(trainFile)
    class_label = trainFile.label.unique()
    num_classes = len(trainFile.label.unique())
    
    records_per_class = trainFile.label.value_counts().sort_index()
    
    # load previous data
    duplicates = get_duplicate_data()

    cmd = []
    cbb = []
    cbsd = []
    cgm = []
    healthy = []

    for file in os.listdir(os.path.join(config['trainPrevImgDir'], 'cmd')):
        if file not in duplicates:
            cmd.append((file.split('.')[0], 3, os.path.join(config['trainPrevImgDir'], 'cmd', file)))

    for file in os.listdir(os.path.join(config['trainPrevImgDir'], 'cbb')):
        if file not in duplicates:
            cbb.append((file.split('.')[0], 0, os.path.join(config['trainPrevImgDir'], 'cbb', file)))

    for file in os.listdir(os.path.join(config['trainPrevImgDir'], 'cbsd')):
        if file not in duplicates:
            cbsd.append((file.split('.')[0], 1, os.path.join(config['trainPrevImgDir'], 'cbsd', file)))

    for file in os.listdir(os.path.join(config['trainPrevImgDir'], 'cgm')):
        if file not in duplicates:
            cgm.append((file.split('.')[0], 2, os.path.join(config['trainPrevImgDir'], 'cgm', file)))

    for file in os.listdir(os.path.join(config['trainPrevImgDir'], 'healthy')):
        if file not in duplicates:
            healthy.append((file.split('.')[0], 4, os.path.join(config['trainPrevImgDir'], 'healthy', file)))

    print("number of previous data loaded: ", len(cmd) + len(cbb) + len(cbsd) + len(cgm) + len(healthy))
    
    # convert the data into dataframe - the method might need to be improved
    # prev_data = np.concatenate([cmd, cbb, cbsd, cgm, healthy])
    prev_data = np.concatenate([cbb, cbsd, cgm, healthy])

    prev_trainFile = pd.DataFrame.from_records(prev_data, columns=["image_id", "label", "image_dir"])
    
    # get image path
    trainFile['image_dir'] = trainFile['image_id'].apply(lambda _id : os.path.join(img_pth, _id))
    
    # merge the previous data and the current data
    trainFile = trainFile.append(prev_trainFile, ignore_index=True)
    
    # get pseudo labelling data
    pseudoLabelFile = pd.read_csv(config['pseudoLabelDataDir'])
    
    trainFile = trainFile.append(pseudoLabelFile[['image_id', 'label', 'image_dir']], ignore_index=True)
    
    # convert the label to int type for mapping the class label
    trainFile = trainFile.astype({'label': 'int64'})
    
    # get the corresponding class label name to the class id
    classLabelDict = {}
    with open(label_pth) as file:
        classLabelDict = json.loads(file.read())
        classLabelDict = {int(k) : v for k, v in classLabelDict.items()}
    
    trainFile['class_name'] = trainFile['label'].map(classLabelDict)
    
    print(f"Number of Records: {num_records}")
    print(f"Number of Classes: {num_classes}")
    print(f"Class Labels: {class_label}")
    print(f"Records per Class: \n {records_per_class}")
    print(f"Total Number of Files: {len(trainFile)}")
    
    records_per_class.plot.barh()
    
    print(trainFile.head())
    
    return trainFile

In [None]:
# dataset
class CassavaDataset(Dataset):
    def __init__(self, df, transforms = None, output_label = True):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms = transforms
        # whether to output label from the dataset
        self.output_label = output_label
        
        if self.output_label is True:
            # get the label series, and conver it to numpy array
            self.labels = self.df['label'].values
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index: int):
        # get the image
        img = get_image(self.df.loc[index]['image_dir'])
        
        if self.transforms:
            img = self.transforms(image=img)['image']
        
        # get the label
        if self.output_label:
            target = self.labels[index]
            return img, target
        else:
            return img

# data augmentation for train images
def get_train_transforms():
    return Compose([
        RandomResizedCrop(config['img_size'], config['img_size']),
        Transpose(p=0.5),
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        ShiftScaleRotate(p=0.5),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ], p = 1.)

# data augmentation for test images
def get_valid_transforms():
    return Compose([
        Resize(config['img_size'], config['img_size']),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ], p = 1.)

In [None]:
# define model
class CassavaImgClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, n_class)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
# training util functions
def create_data_loader(df, train_idx, valid_idx):
    
    # train test split
    _train = df.loc[train_idx, :].reset_index(drop=True)
    _valid = df.loc[valid_idx, :].reset_index(drop=True)
    
    # create train and test(valid) dataset
    train_dataset = CassavaDataset(_train, transforms=get_train_transforms(), output_label=True)
    valid_dataset = CassavaDataset(_valid, transforms=get_valid_transforms(), output_label=True)
    
    # create data loader
    train_loader = torch.utils.data.DataLoader(train_dataset, 
                                               batch_size=config['train_batchSize'], 
                                               shuffle=True, 
                                               num_workers=config['num_workers'])
    
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=config['valid_batchSize'],
                                               shuffle=False,
                                               num_workers=config['num_workers'])
    
    return train_loader, valid_loader

def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device, scaler=None, scheduler=None):
    # put the model to train mode
    model.train()
    
    if torch.cuda.is_available():
        scaler = GradScaler()
    
    t = time.time()
    running_loss = None
    
    # loop through the dataset
    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (imgs, image_labels) in progress_bar:
        # the image value is represented by floating point
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()
        
        # autocast only work under cuda is available
        if torch.cuda.is_available():
            # allow your script to run in mixed precision
            with autocast():

                image_preds = model(imgs)

                loss = loss_fn(image_preds, image_labels)

                # gradient scaling, prevent small floting point problem
                # too small value flush to zero
                scaler.scale(loss).backward()
        else:
            # may not work - memory exceed
            image_preds = model(imgs)

            loss = loss_fn(image_preds, image_labels)
        
        # calculating the loss
        if running_loss is None:
            running_loss = loss.item()
        else:
            running_loss = running_loss * .99 + loss.item() * .01
        
        # not sure what is it - help large batch size?
        if ((step + 1) % config['accum_iter'] == 0) or ((step + 1) == len(train_loader)):
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
        
        # modify the progress bar infomation
        if ((step + 1) % config['verbose_step'] == 0) or ((step + 1) == len(train_loader)):
            description = f'epoch {epoch} loss: {running_loss:.4f}'
            
            progress_bar.set_description(description)
    
    # learning rate annealing 
    if scheduler is not None:
        scheduler.step()

def valid_one_epoch(epoch, model, loss_fn, valid_loader, device, scheduler=None):
    # put the model to evaluation mode
    model.eval()
    
    t = time.time()
    loss_sum = 0
    sample_num = 0
    image_preds_all = []
    image_targets_all = []
    
    progress_bar = tqdm(enumerate(valid_loader), total=len(valid_loader))
    for step, (imgs, image_labels) in progress_bar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()
        
        image_preds = model(imgs)
        
        # store the prediction result and the corresponding label
        image_preds_all = image_preds_all + [torch.argmax(image_preds, 1).detach().cpu().numpy()]
        image_targets_all = image_targets_all + [image_labels.detach().cpu().numpy()]
        
        loss = loss_fn(image_preds, image_labels)
        
        loss_sum += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]
        
        if ((step + 1) % config['verbose_step'] == 0) or ((step + 1) == len(valid_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            progress_bar.set_description(description)
    
    # concatenate all the validation result, and calculate accuracy
    image_preds_all = np.concatenate(image_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    print('validation multi-class accuracy = {:.4f}'.format((image_preds_all==image_targets_all).mean()))
    print(f"prediction array size: {image_preds_all.shape}")
    print(f"target array size: {image_targets_all.shape}")

    if scheduler is not None:
        scheduler.step()
    
    return image_preds_all, image_targets_all
        
def run_inference(model, device, test_loader, states=None):
    
    model.eval()
    model.to(device)
    
    probs = []
    progress = tqdm(enumerate(test_loader), total=len(test_loader))
    
    for i, (images) in progress:
        images = images.to(device).float()
        
        with torch.no_grad():
            y_preds = model(images)
        
        probs.append(y_preds.to('cpu').numpy())
        
    probs = np.concatenate(probs)
    probs = probs.argmax(1)
    
    return probs

In [None]:
# get the tarining csv
train = process_train_file(config['trainFileInfo'], config['classLableDir'], config['trainImgDir'])

In [None]:
# training

# set seed
seed_everything(config['seed'])

# k-fold validation
folds = StratifiedKFold(n_splits=config['num_folds'], shuffle=True, random_state=config['seed']).split(np.arange(train.shape[0]), train.label.values)

for fold, (trn_idx, val_idx) in enumerate(folds):
    # train fold 0 first
#     if fold > 0:
#         break

    # train on all folds and all epochs
    
    print(f"Training with {fold} started")
    
    print(f"num_train_records: {len(trn_idx)}, num_valid_records: {len(val_idx)}")
    # create data loader
    train_loader, valid_loader = create_data_loader(train, trn_idx, val_idx)
    # set device
    device = torch.device(config['device'])
    # create model
    model = CassavaImgClassifier(config['model_arch'], train.label.nunique(), pretrained=True).to(device)
    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])
    # define scheduler for learning rate annealing
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=config['T_0'], T_mult=1, eta_min=config['min_lr'], last_epoch=-1)
    # define the loss function for training
    # and apply class weight, calculated by:
    # max (number of occurences in most common class) / (num occ. in rare class)
    # e.g. 13158 / 1087 for class 0
    # https://discuss.pytorch.org/t/what-is-the-weight-values-mean-in-torch-nn-crossentropyloss/11455/9
#     _weights = torch.FloatTensor([12.11, 6.01, 5.52, 1.0, 5.11]).to(device)
#     loss_tr = nn.CrossEntropyLoss(weight=_weights).to(device)
#     # define the loss function for validation
#     loss_fn = nn.CrossEntropyLoss(weight=_weights).to(device)
    
    # no class weight version
    loss_tr = nn.CrossEntropyLoss().to(device)
    # define the loss function for validation
    loss_fn = nn.CrossEntropyLoss().to(device)
    
    # only train one epoch for now
    for epoch in range(config['epochs']):
        train_one_epoch(epoch, model, loss_tr, optimizer, train_loader, device, scheduler=scheduler)
        
        # do not track operation for backprop in validation
        with torch.no_grad():
            IMG_PREDS_ALL, IMG_TARGT_ALL = valid_one_epoch(epoch, model, loss_fn, valid_loader, device, scheduler=None)
        
        # save the model after each epoch
        torch.save(model.state_dict(),'{}_fold_{}_{}'.format(config['model_arch'], fold, epoch))
        np.save('{}_fold_{}_{}_preds'.format(config['model_arch'], fold, epoch), IMG_PREDS_ALL)
        np.save('{}_fold_{}_{}_targets'.format(config['model_arch'], fold, epoch), IMG_TARGT_ALL)
        
        
    # remove the stuff after the whole training process
    del model, optimizer, train_loader, valid_loader, scheduler
    # clean GPU cache
    torch.cuda.empty_cache()