Reference
- https://www.kaggle.com/sujoykg/xception-keras

Try

- Use mixup
- fp_16
- Oversampling
- cutout
- efficientnet b3

In [1]:
from fastai.vision import *
from fastai.metrics import *
PATH = Path('../input')

In [2]:
ann_file = '../input/train2019.json'
with open(ann_file) as data_file:
        train_anns = json.load(data_file)

train_anns_df = pd.DataFrame(train_anns['annotations'])[['image_id','category_id']]
train_img_df = pd.DataFrame(train_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
df_train_file_cat = pd.merge(train_img_df, train_anns_df, on='image_id')
df_train_file_cat['category_id']=df_train_file_cat['category_id'].astype(str)
df_train_file_cat = df_train_file_cat.drop(['image_id'],axis=1)
df_train_file_cat.head()

Unnamed: 0,file_name,category_id
0,train_val2019/Plants/400/d1322d13ccd856eb4236c...,400
1,train_val2019/Plants/570/15edbc1e2ef000d8ace48...,570
2,train_val2019/Reptiles/167/c87a32e8927cbf4f06d...,167
3,train_val2019/Birds/254/9fcdd1d37e96d8fd94dfdc...,254
4,train_val2019/Plants/739/ffa06f951e99de9d220ae...,739


In [3]:
%%time
# Try Oversampling

res = None
sample_to = df_train_file_cat.category_id.value_counts().max() # which is 500

for grp in df_train_file_cat.groupby('category_id'):
    n = grp[1].shape[0]
    additional_rows = grp[1].sample(0 if sample_to < n  else sample_to - n, replace=True)
    rows = pd.concat((grp[1], additional_rows))
    
    if res is None: res = rows
    else: res = pd.concat((res, rows))

CPU times: user 12.8 s, sys: 12 ms, total: 12.8 s
Wall time: 12.8 s


In [4]:
res.category_id.value_counts()[:10]

527    500
872    500
726    500
640    500
929    500
583    500
384    500
558    500
182    500
341    500
Name: category_id, dtype: int64

In [5]:
test_ann_file = '../input/test2019.json'
with open(test_ann_file) as data_file:
        test_anns = json.load(data_file)
test_img_df = pd.DataFrame(test_anns['images'])[['file_name','id']].rename(columns={'id':'image_id'})
test_img_df.head()

Unnamed: 0,file_name,image_id
0,test2019/e295f3c7046b1f1e80c0301401324aa9.jpg,268243
1,test2019/ad3dcbb6846ed0b4dab58d7b1a4210ba.jpg,268244
2,test2019/e697be8e296b4b140cff4f96f85c364f.jpg,268245
3,test2019/7e7ba55e6aa26ba99e814d63b15d0121.jpg,268246
4,test2019/6cb6372079d23702511c06923970f13f.jpg,268247


In [22]:
src = (
ImageList.from_df(df=res,path=PATH/"train_val2019")
    .use_partial_data(0.3)
    .split_by_rand_pct(0.1)
    .label_from_df()
    .add_test(ImageList.from_df(df=test_img_df,path=PATH/"test2019"))
)

In [23]:
data = (
    src
    .transform(get_transforms(),size=128)
    .databunch(bs=64*2)
    .normalize(imagenet_stats)
)

In [24]:
!pip install efficientnet_pytorch

[33mYou are using pip version 19.0.3, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [25]:
from efficientnet_pytorch import EfficientNet

In [26]:
model_name = 'efficientnet-b3'
def getModel(pret):
    model = EfficientNet.from_pretrained(model_name)
#     model._bn1 = nn.Identity()
    model._fc = nn.Linear(1536,data.c)
    return model

In [27]:
# learn = cnn_learner(data,models.densenet201,metrics=[error_rate],model_dir='/kaggle/working',pretrained=True,loss_func=LabelSmoothingCrossEntropy()).mixup()

In [28]:
learn = Learner(data,getModel(False),metrics=[error_rate],model_dir='/kaggle/working',loss_func=LabelSmoothingCrossEntropy()).mixup().to_fp16()

Loaded pretrained weights for efficientnet-b3


In [29]:
learn.lr_find()
learn.recorder.plot()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.


KeyboardInterrupt: 

In [None]:
learn.fit_one_cycle(3,1e-3)

In [None]:
SZ=224
cutout_frac = 0.25
p_cutout = 0.75
cutout_sz = round(SZ*cutout_frac)
cutout_tfm = cutout(n_holes=(1,1), length=(cutout_sz, cutout_sz), p=p_cutout)

In [None]:
learn.data = (
    src
    .transform(get_transforms(xtra_tfms=[cutout_tfm]),size=SZ)
    .databunch(bs=64)
    .normalize(imagenet_stats)
)

In [None]:
learn.fit_one_cycle(7,1e-3)

In [None]:
learn.save('cutout-efficient')

In [None]:
# learn.unfreeze()
# learn.fit_one_cycle(8,slice(1e-6,1e-4))

In [None]:
preds,y = learn.get_preds(DatasetType.Test)

In [None]:
results = torch.topk(preds,5)

In [None]:
out = []
for i in results[1].numpy():
    temp = ""
    for j in i:
        temp += (" "+str(data.classes[j])) 
    out.append(temp)
# print(out)

In [None]:
sam_sub_df = pd.read_csv('../input/kaggle_sample_submission.csv')
# sam_sub_df.head()
sam_sub_df["predicted"] = out
sam_sub_df.head()

In [None]:
sam_sub_df.to_csv("submission.csv",index=False)