In [None]:
!pip install fastai2
!pip install pretrainedmodels

In [None]:
import os
import torchvision
import pandas as pd
from fastai2.vision.all import *
import pretrainedmodels
import sklearn.metrics as skm
from sklearn.model_selection import StratifiedKFold

import warnings
warnings.filterwarnings('ignore')

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
SEED=42
seed_everything(SEED)

In [None]:
working_path=Path('../input/plant-pathology-2020-fgvc7')
image_path = working_path/'images'
train_path = working_path/'train.csv'
test_path = working_path/'test.csv'
sample_path = working_path/'sample_submission.csv'

In [None]:
image_path.ls()

In [None]:
train_df = pd.read_csv(train_path)
train_df.head(10)

In [None]:
train_df.set_index('image_id',inplace=True)
train_df = train_df[train_df==1].stack().reset_index().drop(0,1)

In [None]:
def get_data(train_df,valid_idx,size=512):
    plants = DataBlock(
        blocks=(ImageBlock, CategoryBlock(vocab=train_df.level_1.unique())),
        splitter=IndexSplitter(valid_idx),
        get_x = ColReader(0,pref=str(image_path)+'/',suff='.jpg'),
        get_y = ColReader(1),
        item_tfms=RandomResizedCrop(size),
        batch_tfms=aug_transforms(flip_vert=True,max_rotate=30,max_zoom=1,max_lighting=0,max_warp=0)
    )
    return plants, plants.dataloaders(train_df,bs=64)

In [None]:
def _accumulate(self, learn):
    #pred = learn.pred.argmax(dim=self.dim_argmax) if self.dim_argmax else learn.pred
    pred = learn.pred
    if self.sigmoid: pred = torch.nn.functional.softmax(pred) #hack for roc_auc_score
    if self.thresh:  pred = (pred >= self.thresh)
    targ = learn.y
    pred,targ = to_detach(pred),to_detach(targ)
    if self.flatten: pred,targ = flatten_check(pred,targ)
    self.preds.append(pred)
    self.targs.append(targ)

AccumMetric.accumulate = _accumulate

def RocAuc(axis=-1, average='macro', sample_weight=None, max_fpr=None,multi_class='ovr'):
    "Area Under the Receiver Operating Characteristic Curve for single-label binary classification problems"
    return skm_to_fastai(skm.roc_auc_score, axis=axis,
                         average=average, sample_weight=sample_weight, max_fpr=max_fpr,flatten=False,multi_class=multi_class,sigmoid=True)

In [None]:
test_df = pd.read_csv(test_path)

In [None]:
def get_model(model, cut=-2, head=None, from_save=None):
    children = list(model.children())
    
    head = ifnone(head, nn.Sequential(nn.AdaptiveAvgPool2d(1), Flatten(), 
                                      nn.Linear(children[-1].in_features,4)))
    model = nn.Sequential(nn.Sequential(*children[:cut]), head)
    
    if from_save:
        state_dict = torch.load(f'models/{from_save}.pth')
        model[0].load_state_dict(state_dict)
        
    return model

In [None]:
def create_model(dls,model,metrics):
    model = get_model(model)
    return Learner(dls,model,metrics=metrics,opt_func=ranger)

def train_learner(learn,lr,epochs):
    #learn.fine_tune(epochs,base_lr=lr)
    learn.freeze()
    learn.fit_flat_cos(1,lr)
    learn.unfreeze()
    lr /= 2
    learn.fit_flat_cos(epochs,slice(lr/100,lr))
    return learn

def validate_learner(dls,learn,test_df):
    test_dl = dls.test_dl(test_df)
    _, metric = learn.validate()
    preds, _ = learn.get_preds(dl=test_dl)
    return metric, preds

In [None]:
kf = StratifiedKFold(n_splits=5,shuffle=True,random_state=SEED)

In [None]:
# Training
fold_scores = []
fold_preds = []
i=0
for train_idx, valid_idx in kf.split(train_df,train_df.level_1):
    data_block, dls = get_data(train_df,valid_idx,256)
    model = pretrainedmodels.se_resnext50_32x4d(pretrained='imagenet')
    learn = create_model(dls,model,RocAuc())
    learn = train_learner(learn,5e-3,10)
    #data_block, dls = get_data(train_df,valid_idx,512)
    #learn = train_learner(learn,2e-3,2)
    #learn.save('resnet34-sz512-fold'+str(i)+'.pth')
    metric, preds = validate_learner(dls,learn,test_df)
    learn.export('export'+str(i)+'.pkl')
    fold_scores.append(metric)
    fold_preds.append(preds)
    i+=1
fold_preds = torch.mean(torch.stack(fold_preds),dim=0).cpu().numpy()

In [None]:
learn = create_model(dls,model,RocAuc())
learn.lr_find()

In [None]:
cv_score = np.mean(fold_scores)
print(f'5-fold CV multi-class AUC-ROC: {cv_score}')
print(f'Individual CV scores: {fold_scores}')

In [None]:
# Submission
sample_submission = pd.read_csv(sample_path)
sample_submission[['healthy', 'multiple_diseases', 'rust', 'scab']] = fold_preds
sample_submission.head(10)

In [None]:
sample_submission.to_csv('submission.csv',index=False)