In [None]:
import pandas as pd
import numpy as np
import tensorflow_addons as tfa
import h5py
from sklearn.metrics import f1_score

from sklearn.model_selection import train_test_split
from fastai.vision.all import *

In [None]:
class CFG:
    
    seed=42
    crop_size=510
    img_size=510
    label_type=MultiCategoryBlock
    bs = 32
    classes = ['complex', 'frog_eye_leaf_spot', 'healthy', 'powdery_mildew', 'rust', 'scab']
    
    train_df_path = Path('../input/plant-pathology-2021-fgvc8/train.csv')
    image_path = Path('../input/resized-plant2021/img_sz_640')
    dup_path = Path('../input/pp2021-duplicates-revealing/duplicates.csv')
    
    item_tfms=RandomResizedCrop(crop_size)
    batch_tfms=[*aug_transforms(mult=2.0, flip_vert=True, size=img_size),
                Normalize.from_stats(*imagenet_stats)
    ]
    cutmix = CutMix(1.)

set_seed(CFG.seed, reproducible=True)

def remove_dup(df, dup):
    index = []
    for i in dup[2]:
        index.append(df.loc[df['image'] == i].index[0])
    df = df.drop(index, axis=0).reset_index(drop=True)
    
    return df

def get_x(x):
    return CFG.image_path/x['image']

def get_y(y):
    return y['labels'].split(' ')

def lv_dataset(df,n_toshow=9):
    db = DataBlock(blocks=(ImageBlock, CFG.label_type(vocab=CFG.classes)),
                   splitter=RandomSplitter(seed=CFG.seed),
                   get_x=get_x,
                   get_y=get_y,
                   item_tfms=CFG.item_tfms,
                   batch_tfms=CFG.batch_tfms
                  )

    dls = db.dataloaders(df, bs=CFG.bs)
    dls.show_batch(max_n=n_toshow)
    
    return dls

def get_BestThreshs(preds, targs):
    scores = []
    thresholds = np.arange(0.01, 1., 0.01)
    
    for threshold in thresholds:
        metric = tfa.metrics.F1Score(num_classes=len(CFG.classes),
                                     threshold=threshold
                                    )
        metric.update_state(targs, preds)
        scores.append(metric.result().numpy())
    
    df = pd.DataFrame(columns=CFG.classes, index=pd.Index(thresholds, name='threshold'), data=scores)
    
    thresholds = []
    for cls in CFG.classes:
        thresholds.append(df[cls].idxmax())
    
    for cls, th in zip(CFG.classes, thresholds):
        print(cls + ' >>>> ', th)
        
    return thresholds

def save_thresholds(thresholds):
    with h5py.File('thresholds.h5', 'w') as h:
        h.create_dataset('thresholds', data=thresholds)

In [None]:
df = pd.read_csv(CFG.train_df_path)
dup = pd.read_csv(CFG.dup_path, names=[1,2])
df = remove_dup(df, dup)
train_df, val_df = train_test_split(df, test_size=0.1, shuffle=True, random_state=32)

In [None]:
dls = lv_dataset(train_df,20)

In [None]:
learn = cnn_learner(dls, resnet50, metrics=[accuracy_multi]).to_fp16()

In [None]:
min_lr, lr = learn.lr_find()

In [None]:
learn.fine_tune(7, lr, freeze_epochs=2)

In [None]:
val_dls = learn.dls.test_dl(val_df, with_labels=True)
predictions = 0
lb = 0
for m in ['../input/trained-models1/alexnet.pkl', '../input/trained-models1/resnet50.pkl']:
    learn = load_learner(m, cpu=False).to_fp32()
    preds, y = learn.get_preds(dl=val_dls)
    predictions += preds
    lb = y

predictions /= 2
thresholds = get_BestThreshs(predictions, lb)
save_thresholds(thresholds)

In [None]:
save_thresholds(thresholds)

In [None]:
learn.export(f'resnet50.pkl')