In [1]:
%reload_ext autoreload
%autoreload 2
import fastai
from fastai.vision import *
import pretrainedmodels

In [2]:
from utils import FocalLoss
from imbalanced_sampler import ImbalancedDatasetSampler

In [3]:
path = Path('.')

In [4]:
train = path/'train_dehaze/'
test =  path/'leaderboard_test_data_dehaze/'
holdout = path/'leaderboard_holdout_data_dehaze/'
sample_sub = path/'SampleSubmission.csv'
labels = path/'traininglabels.csv'

In [5]:
from fastai.torch_core import flatten_model
from fastai.layers import CrossEntropyFlat


def arch_summary(arch):
    model = arch(False)
    tot = 0
    for i, l in enumerate(model.children()):
        n_layers = len(flatten_model(l))
        tot += n_layers
        print(f'({i}) {l.__class__.__name__:<12}: {n_layers:<4}layers (total: {tot})')


def get_groups(model, layer_groups):
    group_indices = [len(g) for g in layer_groups]
    curr_i = 0
    group = []
    for layer in model:
        group_indices[curr_i] -= len(flatten_model(layer))
        group.append(layer.__class__.__name__)
        if group_indices[curr_i] == 0:
            curr_i += 1
            print(f'Group {curr_i}:', group)   
            group = []

In [6]:
def resnext101_32x4d(pretrained=False):
    pretrained = 'imagenet' if pretrained else None
    model = pretrainedmodels.__dict__['resnext101_32x4d'](pretrained=pretrained)
    all_layers = list(model.children())
    return nn.Sequential(*all_layers[0], *all_layers[1:])

In [7]:
test_names = [f for f in test.iterdir()]
holdout_names = [f for f in holdout.iterdir()]

In [8]:
df = pd.read_csv(labels)
df_sample = pd.read_csv(sample_sub)

In [9]:
def get_data(df, size=128, bs=64, pct=0.2, seed=2019):
    data = (ImageItemList.from_df(df, path, folder=train)
           .random_split_by_pct(pct, seed=seed)
           .label_from_df('has_oilpalm')
           .add_test(test_names+holdout_names)
           .transform(get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.), size=size))
    
    test_ds = data.test
    train_ds, val_ds = data.train, data.valid
    datasets = [train_ds,val_ds, test_ds]
    sampler = ImbalancedDatasetSampler(datasets[0], func=np.log)
    train_dl = DataLoader(datasets[0], bs, sampler=sampler, num_workers=12)
    val_dl = DataLoader(datasets[1], 2*bs, False, num_workers=8)
    test_dl = DataLoader(datasets[2], 2*bs, False, num_workers=8)

    return ImageDataBunch(train_dl=train_dl, valid_dl=val_dl, test_dl=test_dl).normalize(imagenet_stats)

In [10]:
#This was working perfectly some minutes ago!
from sklearn.metrics import roc_auc_score
def auc_score(y_score,y_true):
    return torch.tensor(roc_auc_score(y_true,y_score[:,1])) # use as metric

def get_learner(data, mixup=True):
    learn = create_cnn(data, 
                       resnext101_32x4d,
                       pretrained=True,
                       loss_func=FocalLoss(),
                       metrics=[accuracy])
    if mixup: learn.mixup()
    return learn.to_fp16()

In [11]:
data = get_data(df, 256, 48)

In [None]:
learn = get_learner(data)

In [None]:
learn.lr_find(); learn.recorder.plot()

In [21]:
lr = 2e-2

256

# X-validation

In [14]:
name = 'X5-TTA-FL-IS'
# sample = 500
f = open("log_101_FL.txt","w+")
folds = 5
preds = 0
for i in range(folds):
    f.write(f'---Fold {i}---\n')
    data = get_data(df, 256, 32, seed=i)
    learn = get_learner(data)
    learn.freeze()
    learn.fit_one_cycle(12, slice(lr))
    learn.save(f'256-{i}')
    learn.unfreeze()
    learn.fit_one_cycle(20, slice(lr/100, lr/10))
    learn.save(f'256u-{i}')
    p,t = learn.get_preds()
    f.write(f'  AUC score after fold {i} :{auc_score(p,t)}\n')
    
    #TTA shit
    learn = get_learner(get_data(256, 48))
    learn.load(f'256u-{i}')
    learn.model.float()
    p,t = learn.TTA(ds_type=DatasetType.Test)
    
    preds += to_np(p[:,1])/folds
   
f.close()

In [15]:
np.save(f'preds_resneXt101-{name}', preds)

## Sub file

In [16]:
ids = np.array([f.name for f in test_names]+[f.name for f in holdout_names])

In [17]:
ids.shape

(6534,)

In [18]:
sub = pd.DataFrame(np.stack([ids, preds], axis=1), columns=df_sample.columns)

In [19]:
sub.to_csv(f'sub_resneXt101-{name}.csv', index=False)