In [341]:
import fastai
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from fastai.transforms import transforms_side_on
from fastai.conv_learner import ImageClassifierData, get_cv_idxs, resnet34, resnet50, tfms_from_model, ConvLearner, vgg16
from fastai.metrics import f1
from sklearn.metrics import f1_score, confusion_matrix

from PIL import Image

In [189]:
df = pd.read_csv('../data/train/train.csv')
n = len(df)
n

89896

In [17]:
def show_image(image_id, ax=None):
    img = Image.open(f'../data/train/{image_id}.jpg')
    if ax:
        ax.imshow(img)
    else:
        plt.imshow(img)

In [None]:
labs = {
    '5': 'jumpers',
    '2': 'open shoulder',
    '1': 'diamonds',
    '3': 'sports bottoms',
    '4': 'sports grey',
    '6': 'lycra shorts',
    '7': 'open back',
    '8': 'grungy',
    '9': 'tight fitting',
    '10': 'bikini tops',
    '11': 'baggy tops',
    '12': 'flowy dresses',
    '13': 'wedding',
    '14': 'beige',
    '15': 'bikini',
    '16': 'corset',
    '17': 'black lace',
    '18'
}

## Validation
### VGG

In [444]:
model = resnet50
model = vgg16
tfms = tfms_from_model(model, 64, aug_tfms=transforms_side_on, max_zoom=1.05)

data = ImageClassifierData.from_csv(
    path='../data',
    folder='train',
    csv_fname='../data/train/train.csv',
    bs=32,
    test_name='test',
    skip_header=True,
    suffix='.jpg',
    val_idxs=get_cv_idxs(n, val_pct=0.2),
    tfms=tfms
)

In [445]:
learn = ConvLearner.pretrained(model, data)

In [446]:
learn.load('80k_unfrozen_vgg_l062_03.h5')

In [447]:
y_pred_tta, y = learn.TTA()
y_pred_mean_tta = np.mean(y_pred_tta, axis=0)
f1_score(data.val_y, (y_pred_mean_tta > 0.25).astype('int'), average='micro')



0.54053716186977729

In [443]:
f1_score(data.val_y, (y_pred_mean_tta > 0.25).astype('int'), average='micro')

0.5317965334166147

### Resnet

In [360]:
model = resnet50
tfms = tfms_from_model(model, 64, aug_tfms=transforms_side_on, max_zoom=1.05)

data = ImageClassifierData.from_csv(
    path='../data',
    folder='train',
    csv_fname='../data/train/train.csv',
    bs=32,
    test_name='test',
    skip_header=True,
    suffix='.jpg',
    val_idxs=get_cv_idxs(n, val_pct=0.2),
    tfms=tfms
)

In [362]:
learn = ConvLearner.pretrained(model, data)
learn.load('80k_unfrozen_res50_l064_07.h5')

In [400]:
y_res_pred_tta.shape

(5, 17979, 228)

In [364]:
model = resnet50
tfms = tfms_from_model(model, 64, aug_tfms=transforms_side_on, max_zoom=1.05)

data = ImageClassifierData.from_csv(
    path='../data',
    folder='train',
    csv_fname='../data/train/train.csv',
    bs=32,
    test_name='test',
    skip_header=True,
    suffix='.jpg',
    val_idxs=get_cv_idxs(n, val_pct=0.2),
    tfms=tfms
)

learn = ConvLearner.pretrained(model, data)
learn.load('80k_unfrozen_res50_l064_07.h5')

y_test_res_pred_tta, y = learn.TTA(is_test=True)
y_test_res_pred_mean_tta = np.mean(y_test_res_pred_tta, axis=0)

In [421]:
f1_score(y, (y_res_pred_mean_tta > 0.25).astype('int'), average='micro')

0.55728516633979308

In [448]:
y_ens = np.array([y_pred_mean_tta, y_res_pred_mean_tta])
y_ens_pred = np.mean(y_ens, axis=0)

In [449]:
f1_score(y, (y_ens_pred > 0.25).astype('int'), average='micro')

0.56352915730178588

In [453]:
for p in np.arange(0.2, 0.3, 0.01):
    print(p, f1_score(y, (y_ens_pred > p).astype('int'), average='micro'))

0.2 0.555426111932
0.21 0.558119169008
0.22 0.560070217123
0.23 0.561795515736
0.24 0.56306628572
0.25 0.563529157302
0.26 0.563864926709
0.27 0.56390548972
0.28 0.563173214003
0.29 0.562687963308


## Test Set

### Resnet

In [432]:
model = resnet50
tfms = tfms_from_model(model, 64, aug_tfms=transforms_side_on, max_zoom=1.05)

data = ImageClassifierData.from_csv(
    path='../data',
    folder='train',
    csv_fname='../data/train/train.csv',
    bs=32,
    test_name='test',
    skip_header=True,
    suffix='.jpg',
    val_idxs=get_cv_idxs(n, val_pct=0.2),
    tfms=tfms
)

learn = ConvLearner.pretrained(model, data)
learn.load('80k_unfrozen_res50_l064_07.h5')

y_test_res_pred_tta, y = learn.TTA(is_test=True)
y_test_res_pred_mean_tta = np.mean(y_test_res_pred_tta, axis=0)



### VGG

In [454]:
model = vgg16
tfms = tfms_from_model(model, 64, aug_tfms=transforms_side_on, max_zoom=1.05)

data = ImageClassifierData.from_csv(
    path='../data',
    folder='train',
    csv_fname='../data/train/train.csv',
    bs=32,
    test_name='test',
    skip_header=True,
    suffix='.jpg',
    val_idxs=get_cv_idxs(n, val_pct=0.2),
    tfms=tfms
)

learn = ConvLearner.pretrained(model, data)
learn.load('80k_unfrozen_vgg_l062_03.h5')

y_test_vgg_pred_tta, y = learn.TTA(is_test=True)
y_test_vgg_pred_mean_tta = np.mean(y_test_vgg_pred_tta, axis=0)



### Mean

In [455]:
y_test_ens = np.array([y_test_res_pred_mean_tta, y_test_vgg_pred_mean_tta])
y_test_pred = np.mean(y_test_ens, axis=0)
y_pred = (y_test_pred > 0.27).astype('int')

## Upload

In [456]:
indexes = y_pred * np.array([np.arange(len(y_pred[0])) for _ in np.arange(len(y_pred))])
indexes = indexes.tolist()

In [457]:
test_files = [f.split('/')[1].split('.')[0] for f in data.test_ds.fnames]
test_labels = [" ".join([data.classes[index] for index in row if index > 0]) for row in indexes]

In [458]:
test_df = pd.DataFrame({'image_id': test_files, 'label_id': test_labels})
test_df['image_id'] = test_df['image_id'].astype('int')
test_df = test_df.sort_values('image_id').reset_index(drop=True)

In [459]:
test_df.to_csv('submission_07_jc.csv', index=False)

In [165]:
y_pred

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])