# Planet: Understanding the Amazon from Space
This is a kaggle compitition named [Planet: Understanding the Amazon from Space](https://www.kaggle.com/c/planet-understanding-the-amazon-from-space).

In [0]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# 1.1 Install and Importing libraries

In [0]:
!pip install fastai

In [0]:
# https://opencv.org/
!apt-get -qq install -y libsm6 libxext6 && pip install -q -U opencv-python
import cv2

In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
# This file contains all the main external libs we'll use
from fastai.imports import *
from fastai.torch_imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

# # 1.2 Collecting data

In [0]:
!mkdir data && cd data
!mkdir planetAmazon && cd planetAmazon

In [0]:
cd planetAmazon

We will use the jpg version instead of tif version.

In [0]:
!kg download -u puzzlemusa -p puzzlemusa13 -c planet-understanding-the-amazon-from-space -f sample_submission_v2.csv.zip
!kg download -u puzzlemusa -p puzzlemusa13 -c planet-understanding-the-amazon-from-space -f test-jpg-additional.tar.7z
!kg download -u puzzlemusa -p puzzlemusa13 -c planet-understanding-the-amazon-from-space -f test-jpg.tar.7z
!kg download -u puzzlemusa -p puzzlemusa13 -c planet-understanding-the-amazon-from-space -f test_v2_file_mapping.csv.zip
!kg download -u puzzlemusa -p puzzlemusa13 -c planet-understanding-the-amazon-from-space -f train-jpg.tar.7z
!kg download -u puzzlemusa -p puzzlemusa13 -c planet-understanding-the-amazon-from-space -f train_v2.csv.zip

In [0]:
!apt-get install -y p7zip-full 

In [0]:
!7z x test-jpg.tar.7z
!7z x test-jpg-additional.tar.7z
!7z x train-jpg.tar.7z

In [0]:
!tar -xvf test-jpg.tar
!tar -xvf test-jpg-additional.tar
!tar -xvf train-jpg.tar

In [0]:
!unzip sample_submission_v2.csv.zip
!unzip test_v2_file_mapping.csv.zip
!unzip train_v2.csv.zip

In [97]:
ls

[0m[01;34m__MACOSX[0m/                     test-jpg-additional.tar       [01;34mtmp[0m/
rst.csv                       test-jpg-additional.tar.7z    [01;34mtrain-jpg[0m/
sample_submission_v2.csv      test-jpg.tar                  train-jpg.tar
sample_submission_v2.csv.zip  test-jpg.tar.7z               train-jpg.tar.7z
[01;34mtest-jpg[0m/                     test_v2_file_mapping.csv      train_v2.csv
[01;34mtest-jpg-additional[0m/          test_v2_file_mapping.csv.zip  train_v2.csv.zip


# 2 Initial exploration

# 3 Initial model

In [0]:
from sklearn.metrics import fbeta_score
import warnings

def f2(preds, targs, start=0.17, end=0.24, step=0.01):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        return max([fbeta_score(targs, (preds>th), 2, average='samples')
                    for th in np.arange(start,end,step)])

In [0]:
PATH = '/content/data/planetAmazon/'
metrics=[f2]

In [0]:
arch = resnet34

In [0]:
label_csv = f'{PATH}train_v2.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)

In [0]:
label_csv

In [0]:
def get_data(sz):
  tfms = tfms_from_model(arch, sz, aug_tfms=transforms_top_down, max_zoom=1.05)
  return ImageClassifierData.from_csv(PATH, 'train-jpg', label_csv, test_name='test-jpg', val_idxs=val_idxs, suffix='.jpg', tfms=tfms)

In [0]:
data = get_data(256)

In [0]:
x, y = next(iter(data.val_dl))

In [0]:
y

In [0]:
list(zip(data.classes, y[0]))

In [0]:
plt.imshow(data.val_ds.denorm(to_np(x))[0]*1.4);

In [0]:
sz = 64

In [0]:
data = get_data(sz)

In [0]:
data = data.resize(int(sz*1.3), 'tmp')

In [0]:
learn = ConvLearner.pretrained(arch, data, metrics=metrics)

In [0]:
lrf = learn.lr_find()
learn.sched.plot()

In [0]:
lr = .2

In [0]:
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)

In [0]:
lrs = np.array([lr/9, lr/3, lr])

In [0]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)

In [0]:
learn.save(f'{sz}')

In [0]:
learn.load(f'{sz}')

In [0]:
learn.sched.plot_loss()

# 3.1 Increase size

In [0]:
sz = 128

In [0]:
data = get_data(sz)
learn.set_data(data)
learn.freeze()
learn.fit(lr, 3, cycle_len=1, cycle_mult=3)

In [0]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=3)

In [0]:
learn.save(f'{sz}')

In [0]:
learn.load(f'{sz}')

In [0]:
sz = 256

In [0]:
data = get_data(sz)
learn.set_data(data)
learn.freeze()
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)

In [0]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=3, cycle_mult=2)

In [0]:
learn.save(f'{sz}')

In [0]:
learn.load(f'{sz}')

In [70]:
log_preds, y = learn.TTA(is_test=True)
probs = np.exp(log_preds)



In [101]:
f2(log_preds, y)

ValueError: ignored

In [84]:
tsld = opt_th(log_preds, y)

TypeError: ignored

In [85]:
log_preds[:5]

array([[0.94686, 0.00038, 0.01426, 0.0014 , 0.00081, 0.81163, 0.00419, 0.00018, 0.16646, 0.12761, 0.21177,
        0.00435, 0.96888, 0.28992, 0.00093, 0.00278, 0.10957],
       [0.00607, 0.00003, 0.00082, 0.00021, 0.00062, 0.99995, 0.00007, 0.00002, 0.00333, 0.00188, 0.00004,
        0.00003, 0.9997 , 0.00206, 0.00029, 0.00014, 0.00653],
       [0.63741, 0.00011, 0.01014, 0.00085, 0.00091, 0.99886, 0.00009, 0.00054, 0.1099 , 0.02487, 0.00103,
        0.00066, 0.99916, 0.91273, 0.00478, 0.00374, 0.46469],
       [0.00433, 0.     , 0.00005, 0.00019, 0.00017, 0.99997, 0.00001, 0.     , 0.00151, 0.0003 , 0.00001,
        0.00004, 0.99998, 0.0013 , 0.00011, 0.00001, 0.001  ],
       [0.00382, 0.00004, 0.00012, 0.00012, 0.00003, 0.00413, 0.00107, 0.00002, 0.00074, 0.00134, 0.00122,
        0.98752, 0.99546, 0.00325, 0.00004, 0.00001, 0.43924]], dtype=float32)

In [0]:
classes = np.array(data.classes)
rst = np.array([" ".join(classes[(np.where(pp>0.17))]) for pp in log_preds])
filenames = np.array([os.path.basename(fn).split('.')[0] for fn in data.test_ds.fnames])
frame = pd.DataFrame(rst, index=filenames, columns=['tags'])
frame.to_csv(f'{PATH}rst.csv', index_label='image_name')

In [100]:
frame.shape

(40669, 1)

In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once in a notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [98]:
# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Create & upload a file.
uploaded = drive.CreateFile({'title': 'rst.csv'})
uploaded.SetContentFile('rst.csv')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

Uploaded file with ID 1ykPKzzDQ1aQcnMzoh1Q2UPG9E3Bjwf6v


In [99]:
!kg submit rst.csv -u puzzlemusa -p puzzlemusa13 -c planet-understanding-the-amazon-from-space -m "1st try from colab after formatting"

something went wrong
