In [None]:
# !pip install iterative-stratification
!pip install mmcv-full

In [None]:
# Install mmcls
!git clone https://github.com/open-mmlab/mmclassification.git
%cd mmclassification
!pip install -e .

In [None]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMClassification installation
import mmcls
print(mmcls.__version__)

In [None]:
!mkdir checkpoints
# !wget https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth -P checkpoints
!wget https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_batch256_imagenet_20200708-c07adbb7.pth -P checkpoints

In [None]:
from mmcls.apis import inference_model, init_model, show_result_pyplot
# Specify the path to config file and checkpoint file
config_file = 'configs/resnext/resnext101_32x4d_b32x8_imagenet.py'
checkpoint_file = 'checkpoints/resnet50_batch256_imagenet_20200708-cfb998bf.pth'
# checkpoint_file = 'checkpoints/resnext50_32x4d_batch256_imagenet_20200708-c07adbb7.pth'
# Specify the device. You may also use cpu by `device='cpu'`.
device = 'cuda:0'
# Build the model from a config file and a checkpoint file
model = init_model(config_file, checkpoint_file, device=device)

In [None]:
# Test a single image
img = 'demo/demo.JPEG'
result = inference_model(model, img)

In [None]:
# Show the results
show_result_pyplot(model, img, result)

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("../../input/vinbigdata-chest-xray-abnormalities-detection/train.csv")

In [None]:
df.head()

In [None]:
df.image_id.nunique()

In [None]:
len(df)

In [None]:
df_new = df.groupby('image_id')['class_id'].apply(list).reset_index()

In [None]:
df_new.head()

In [None]:
df_new['class_id'] = df_new['class_id'].apply(lambda x: list(set(x)))

In [None]:
df_new.head()

In [None]:
df['label'] = 0

In [None]:
df.loc[df.class_name!='No finding', ['label']] = 1

In [None]:
df.head()

In [None]:
df_with_labels = df.groupby('image_id')['label'].sum().reset_index()

In [None]:
df_with_labels.loc[df_with_labels.label>0,['label']] = 1

In [None]:
df_with_labels.head()

In [None]:
len(df_with_labels)

In [None]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=2)

In [None]:
import numpy as np

In [None]:
image_ids, labels = np.array(df_with_labels.image_id.tolist()), np.array(df_with_labels.label.tolist())

In [None]:
for train_index, test_index in skf.split(image_ids, labels):
    X_train, X_test = image_ids[train_index], image_ids[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

In [None]:
train_df = pd.DataFrame({'id': X_train, 'y': y_train})
val_df = pd.DataFrame({'id': X_test, 'y': y_test})

In [None]:
test_df = pd.read_csv("../../input/vinbigdata-1024-image-dataset/vinbigdata/test.csv")

In [None]:
test_df['image_id']  = test_df['image_id']  + ".png"
test_df['label'] = 0

In [None]:
test_df.head()

In [None]:
test_df[['image_id', 'label']].to_csv('./test.txt', sep=' ', header=False, index=False)

In [None]:
train_df.head()

In [None]:
train_df['id'] = train_df['id'] + ".png"
val_df['id'] = val_df['id'] + ".png"

In [None]:
train_df.to_csv("./train.txt", sep=" ", header=False, index=False)
val_df.to_csv("./val.txt", sep=" ", header=False, index=False)

In [None]:
import mmcv
import numpy as np

from mmcls.datasets import DATASETS, BaseDataset


# Regist model so that we can access the class through str in configs
@DATASETS.register_module()
class VinBigDataset(BaseDataset):

    def load_annotations(self):
        assert isinstance(self.ann_file, str)

        data_infos = []
        with open(self.ann_file) as f:
            # The ann_file is the annotation files we generate above.
            samples = [x.strip().split(' ') for x in f.readlines()]
            for filename, gt_label in samples:
                info = {'img_prefix': self.data_prefix}
                info['img_info'] = {'filename': filename}
                info['gt_label'] = np.array(gt_label, dtype=np.int64)
                data_infos.append(info)
            return data_infos

In [None]:


# Load the existing config file
from mmcv import Config
# cfg = Config.fromfile('configs/resnet/resnet50_b32x8_imagenet.py')
cfg = Config.fromfile('configs/resnext/resnext101_32x4d_b32x8_imagenet.py')





In [None]:
import os.path as osp
classes = ['normal', 'diseases']
with open(osp.join('./', 'classes.txt'), 'w') as f:
    f.writelines('\n'.join(classes))

In [None]:
# Specify the new dataset class
cfg.dataset_type = 'VinBigDataset'
cfg.data.train.type = cfg.dataset_type
cfg.data.val.type = cfg.dataset_type
cfg.data.test.type = cfg.dataset_type

# Specify the training annotations
cfg.data.train.ann_file = './train.txt'

# The followings are the same as above
cfg.data.samples_per_gpu = 32
cfg.data.workers_per_gpu=2

cfg.img_norm_cfg = dict(
    mean=[124.508, 116.050, 106.438], std=[58.577, 57.310, 57.437], to_rgb=True)

cfg.data.train.data_prefix = '../../input/vinbigdata-1024-image-dataset/vinbigdata/train'
cfg.data.train.classes = './classes.txt'

cfg.data.val.data_prefix = '../../input/vinbigdata-1024-image-dataset/vinbigdata/train'
cfg.data.val.ann_file = './val.txt'
cfg.data.val.classes = './classes.txt'

cfg.data.test.data_prefix = '../../input/vinbigdata-1024-image-dataset/vinbigdata/train'
cfg.data.test.ann_file = './val.txt'
cfg.data.test.classes = './classes.txt'
# Modify the metric method
cfg.evaluation['metric_options']={'topk': (1)}


In [None]:
! cat "classes.txt"

In [None]:
# MODOL CONFIG
# Modify num classes of the model in classification head
cfg.model.head.num_classes = 2
cfg.model.head.topk = (1)

# SCHEDULE CONFIG
# Optimizer
cfg.optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
cfg.optimizer_config = dict(grad_clip=None)
# Learning policy
cfg.lr_config = dict(policy='step', step=[1])
cfg.runner = dict(type='EpochBasedRunner', max_epochs=2)

# RUNTIME CONFIG
# Load the pretrained weights
# cfg.load_from = 'checkpoints/resnet50_batch256_imagenet_20200708-cfb998bf.pth'
cfg.load_from = 'checkpoints/resnext50_32x4d_batch256_imagenet_20200708-c07adbb7.pth'
# Set up working dir to save files and logs.
cfg.work_dir = './vin_work_dirs'
from mmcls.apis import set_random_seed
# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# Let's have a look at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

In [None]:
cfg.runner.max_epochs = 20

In [None]:
!ls ./vin_work_dirs

In [None]:
import time

from mmcls.datasets import build_dataset
from mmcls.models import build_classifier
from mmcls.apis import train_model

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# Build the classifier
model = build_classifier(cfg.model)
# Build the dataset
datasets = [build_dataset(cfg.data.train)]
# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES
# Begin finetuning
train_model(
    model,
    datasets,
    cfg,
    distributed=False,
    validate=True,
    timestamp=time.strftime('%Y%m%d_%H%M%S', time.localtime()),
    meta=dict())

In [None]:
import matplotlib.pyplot as plt

In [None]:
img = mmcv.imread('../../input/vinbigdata-1024-image-dataset/vinbigdata/test/002a34c58c5b758217ed1f584ccbcfe9.png')

model.cfg = cfg
result = inference_model(model, img)
plt.figure(figsize=(8, 6))
show_result_pyplot(model, img, result)

In [None]:
# "img_norm_cfg":"dict(mean=[124.508, 116.050, 106.438], std=[58.577, 57.310, 57.437], to_rgb=True)"
# "evaluation.metric_options": '''dict('topk' : '(1)')''',
# "optimizer" :  '''dict('type'='SGD', 'lr'='0.01', 'momentum'='0.9', 'weight_decay'='0.0001')''',
# "lr_config" : '''dict('policy'='step', 'step'='[1]')''',
# "runner" : '''dict('type'='EpochBasedRunner', 'max_epochs'='2')''',

# "data.train.type":'VinBigDataset',
#     "data.val.type" : 'VinBigDataset',
#         "data.test.type": 'VinBigDataset',   
_cfg_options = {
"classes" : './classes.txt',
"data.train.data_prefix" : '../../input/vinbigdata-1024-image-dataset/vinbigdata/train/',
"data.train.classes" : './classes.txt',
"data.train.ann_file" : './train.txt',
             
"data.val.data_prefix" : '../../input/vinbigdata-1024-image-dataset/vinbigdata/test/',
"data.val.classes" : './classes.txt',
"data.val.ann_file" : './val.txt',

"data.test.data_prefix" :  '../../input/vinbigdata-1024-image-dataset/vinbigdata/test/',
"data.test.classes" :  './classes.txt',
"data.test.ann_file" :  './test.txt',
'evaluation.metric_options.topk' : '1',
"data.samples_per_gpu":'32',
"data.workers_per_gpu" :'2',
"model.head.num_classes":'2',
"model.head.topk":"1",
"optimizer.type" : 'SGD',
"optimizer.lr" : '0.01',
"optimizer.momentum" : '0.9',
"optimizer.weight_decay" : '0.0001',
"optimizer_config.grad_clip" : "None",
"lr_config.policy": 'step',
"lr_config.step": '1',
"runner.type": "EpochBasedRunner",
"runner.max_epochs": "2",
    
"load_from" : 'checkpoints/resnext50_32x4d_batch256_imagenet_20200708-c07adbb7.pth',
"work_dir" : './vin_work_dirs_val',

"runner.max_epochs" : "20",
   
"img_norm_cfg.to_rgb" : 'True'
}
# "load_from" : 'checkpoints/resnet50_batch256_imagenet_20200708-cfb998bf.pth',
# "gpu_ids" : "range(0, 1)",
# "img_norm_cfg.mean" : "[124.508, 116.050, 106.438]",
# "img_norm_cfg.std" : "[58.577, 57.310, 57.437]",  
cfg_op = ""
for k, v in _cfg_options.items():
    cfg_op+=f"{k}='{v}' "
print(cfg_op)

In [None]:
!python tools/test.py configs/resnext/resnext101_32x4d_b32x8_imagenet.py ./vin_work_dirs/latest.pth --out=results_resnext_20_epoch.json --options classes='./classes.txt' data.train.data_prefix='../../input/vinbigdata-1024-image-dataset/vinbigdata/train/' data.train.classes='./classes.txt' data.train.ann_file='./train.txt' data.val.data_prefix='../../input/vinbigdata-1024-image-dataset/vinbigdata/test/' data.val.classes='./classes.txt' data.val.ann_file='./val.txt' data.test.data_prefix='../../input/vinbigdata-1024-image-dataset/vinbigdata/test/' data.test.classes='./classes.txt' data.test.ann_file='./test.txt' evaluation.metric_options.topk='1' data.samples_per_gpu='32' data.workers_per_gpu='2' model.head.num_classes='2' model.head.topk='1' optimizer.type='SGD' optimizer.lr='0.01' optimizer.momentum='0.9' optimizer.weight_decay='0.0001' optimizer_config.grad_clip='None' lr_config.policy='step' lr_config.step='1' runner.type='EpochBasedRunner' runner.max_epochs='20' load_from='checkpoints/resnext50_32x4d_batch256_imagenet_20200708-c07adbb7.pth' work_dir='./vin_work_dirs_val' img_norm_cfg.to_rgb='True'

In [None]:
# test_df