# Planet: Understanding the Amazon from Space

In [1]:
from fastai.vision import *
from sklearn.metrics import fbeta_score, matthews_corrcoef
from tqdm import tqdm

In [2]:
path = Path('../data')
path

PosixPath('../data')

In [3]:
labels_df = pd.read_csv(path/'train_v2.csv')
labels_df.head()

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


In [4]:
tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)

In [5]:
np.random.seed(14)
src = (ImageList.from_csv(path, 'train_v2.csv', folder='train-jpg', suffix='.jpg')
      .split_none()
      .label_from_df(label_delim=' '))

In [6]:
data = (src.transform(tfms, size=256)
       .databunch().normalize(imagenet_stats))

In [7]:
data.train_ds

LabelList (40479 items)
x: ImageList
Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256),Image (3, 256, 256)
y: MultiCategoryList
haze;primary,agriculture;clear;primary;water,clear;primary,clear;primary,agriculture;clear;habitation;primary;road
Path: ../data

In [8]:
model_list = ['resnet50.pkl','resnet101.pkl', 'resnet152.pkl', 'densenet121.pkl', 'densenet169.pkl']

In [9]:
total = np.zeros((len(data.train_ds), 17))
for model in model_list:
    learn = load_learner(path, model)
    learn.data = data
    preds, targets = learn.TTA(scale=1.05, ds_type=DatasetType.Fix)
    total = np.add(total, np.array(preds))
    
preds_avg = np.divide(total, len(model_list))

In [12]:
preds_avg.shape, targets.shape

((40479, 17), torch.Size([40479, 17]))

In [13]:
def f2_score(y_true, y_pred):
    y_true, y_pred, = np.array(y_true), np.array(y_pred)
    return fbeta_score(y_true, y_pred, beta=2)

In [17]:
def find_f2score_threshold(p_valid, y_valid, try_all=False, verbose=False):
    best = 0
    best_score = -1
    totry = np.arange(0.1,0.5,0.005)
    for t in totry:
        score = f2_score(y_valid, p_valid > t)
        if score > best_score:
            best_score = score
            best = t
    if verbose is True: 
        print('Best score: ', round(best_score, 5), ' @ threshold =', best)
    return best

In [18]:
best_thresholds = np.zeros(17)
for i in range(17):
    best_thresholds[i] = find_f2score_threshold(preds_avg[:,i], targets[:,i], verbose=False)

In [19]:
print(best_thresholds)

[0.155 0.22  0.1   0.1   0.155 0.29  0.17  0.11  0.125 0.125 0.14  0.17  0.26  0.205 0.125 0.1   0.16 ]
