In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'planet-understanding-the-amazon-from-space:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F6322%2F868312%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240604%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240604T204936Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D455d3163bd4ccf1129956e0a932d17629731b7595453952a83ed07d1e0b9c82b8cbefcc55681cce01c2247635a7aa73a85ffd97ed957e2d89c82497b27a5de005b45336a8bf5a5839739a581106df9c3d80c5cdf3572201bf400ec76c63eaa75f76e549e03f0230aa242da19fb73028437543215affc4d541025542506b84a42cb58b933130636f75f4866e55850c7420f6af3cbd26e1de3375b564fe15ffc6b41c1821b3bf947596f7a2af2a3e8fbbe2f58d5cd1f3ea0ce61a760906aabace9333da10883c6588d4f5f23aace3b69953a96eee0425e59131dfe998d6712a4445ba8cef1bca714006e02235bfab176091620357f4f9e1501ee2a63cc251cca1a,planets-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F503255%2F938046%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240604%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240604T204936Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D66c146f5fa0dbd4a47039fa1ea61f7611d8129f82c20b4415d972d874be1da9cd2ef735c3bc4ca15501fd72a65a0fe3beaf3a1ae766662aa051b6b889c4b6ae56421d58e59676c879f6f1697a2260728eeb58321199bd424d481f0349a381adb1a1b19058789b127f1aedb31ded3cd51aa1734fb4c08f68604bf489c51cac6edad6e8f1b88af31ef967f14fc210102ae77a1d10acd0503afe14d9f3deccf920a87515c4de092278f499aa6789bb67887844df1d636ede84954708bac9d9566422bd29bcf2a7c33b9fedce00cc71dabb691abadfd8214f9b6ae0c8add862e6939b981c533a7f52ff0ff9ab71ca9fbc2ffdfdeb22ffad5fcbd044fce6f09fee9d3'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading planet-understanding-the-amazon-from-space, 3080605 bytes compressed
Downloaded and uncompressed: planet-understanding-the-amazon-from-space
Downloading planets-dataset, 1609474013 bytes compressed
Downloaded and uncompressed: planets-dataset
Data source import complete.


In [None]:
!pip install fastai --upgrade -q
from fastai.vision.all import *
import warnings
warnings.filterwarnings("ignore")

!pip install wwf -q
!pip install timm -q
from wwf.vision.timm import *

!pip install efficientnet_pytorch -q


# Import data

In [None]:
path = Path('../input/planets-dataset/planet/planet')

In [None]:
df_ = pd.read_csv(path/'train_classes.csv')
df_

In [None]:
def get_x(r):
    return path/'train-jpg'/(r['image_name']+'.jpg')

def get_y(r):
    return r['tags'].split()

def get_data(size=224,bs=64,data_df=df_):
    dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
                       splitter=RandomSplitter(seed=42),
                       get_x=get_x,
                       get_y=get_y,
                       item_tfms = Resize(size),
                       batch_tfms = [*aug_transforms(flip_vert=True,max_warp=0),
                                     Normalize.from_stats(*imagenet_stats)]
                      )
    return dblock.dataloaders(data_df,bs=bs)

In [None]:
dls = get_data(300,40)

In [None]:
dls.show_batch(nrows=1, ncols=3)

 Training

In [None]:
f2samples = FBetaMulti(beta=2,average='samples',thresh=0.2)
metrics = [partial(accuracy_multi, thresh=0.2), f2samples]
cbs = [MixUp]

In [None]:
learn = timm_learner(dls, 'efficientnet_b3', metrics=metrics, cbs=cbs)

In [None]:
learn.fine_tune(12, base_lr=3e-2, freeze_epochs=4)

# Submission using TTA

In [None]:
file_path = Path('../input/planets-dataset/test-jpg-additional/test-jpg-additional')
test_path = Path('../input/planets-dataset/planet/planet/test-jpg')
submission_df = pd.read_csv(path/'sample_submission.csv')
testing_path = (submission_df['image_name'] + '.jpg').apply(lambda x: test_path/x if x.startswith('test') else file_path/x)

def prediction(filename='submission.csv', tta=False):
    tst_dl = learn.dls.test_dl(testing_path)
    if tta:
        predictions = learn.tta(dl = tst_dl)
    else:
        predictions = learn.get_preds(dl = tst_dl)
    predlist = [' '.join(learn.dls.vocab[i]) for i in (predictions[0] > 0.2)]

    df = submission_df
    df['tags'] = predlist

    df.to_csv(filename, index=False)
    return df

In [None]:
prediction('submission_tta.csv', tta=True)