In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from fastai.vision import *

# Define paths

In [None]:
PATH = Path('../input/planet-understanding-the-amazon-from-space')
TRAIN = Path('../input/planet-understanding-the-amazon-from-space/train-jpg')
TEST = Path('../input/planet-understanding-the-amazon-from-space/test-jpg-v2')
PATH.ls()

# Load data

In [None]:
df = pd.read_csv(PATH/'train_v2.csv')
samplesub = pd.read_csv(PATH/'sample_submission_v2.csv')

In [None]:
df.head()

# Exploratory data analysis

 ### Let's check how many files there are on training and test sets

In [None]:
print('Number of training files = {}'.format(len(df)))
print('Number of test files = {}'.format(len(samplesub)))

print('Number of training files = {}'.format(len(TRAIN.ls())))
print('Number of test files = {}'.format(len(TEST.ls())))

### Now to the classes. Since this is a multi-label classification each image can have N different labels, including:



In [None]:
labels = df.groupby('tags')['image_name'].count().reset_index()
labels.head()

In [None]:
labels.sort_values('image_name',ascending=False).head()

In [None]:
#sns.barplot(x=labels['tags'],y=labels['image_name'])
import matplotlib.ticker as ticker
plt.figure(figsize=(30,12))
ax = sns.barplot(x='tags',y='image_name',data=labels)
ax.xaxis.set_major_locator(ticker.MultipleLocator(5))

We can see some labels are overrepresented on our dataset. For instance, clear primary 	dominates the dataset, followed by partly_cloudy primary. Let's take a look at some images of these classes.

In [None]:
sample_primary = df.loc[df['tags']=='clear primary'].head()
sample_partly_cloudy = df.loc[df['tags']=='partly_cloudy primary'].head()

In [None]:
sample_partly_cloudy

In [None]:
sample_primary

In [None]:
open_image(TRAIN/'train_2.jpg') # Clear primary


In [None]:
open_image(TRAIN/'train_17.jpg') # partly_cloudy primary

# Train model

We are going to train a naive model using Fastai v3

### Define transformations

In [None]:
#tfms = [[*rand_resize_crop(256),dihedral(),zoom(scale=1.05)],[]]
tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)

In [None]:
src = ImageList.from_df(df,path=TRAIN,cols='image_name',suffix='.jpg').split_by_rand_pct(0.2).label_from_df(cols='tags',label_delim=' ')

In [None]:
data = src.transform(tfms).databunch(bs=64).normalize(imagenet_stats)

In [None]:
data.show_batch(rows=3)

### Training

In [None]:
arch = models.resnet50

In [None]:
learn = cnn_learner(data,arch,metrics=[fbeta],model_dir='/kaggle/working')

In [None]:
learn.lr_find()
# Find a good learning rate

In [None]:
learn.recorder.plot()

In [None]:
lr = 1e-2

In [None]:
learn.fit_one_cycle(7,slice(lr))

In [None]:
learn.save('stage1-256-resnet50')

### Fine tuning

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()


In [None]:
learn.fit_one_cycle(7,slice(1e-5,lr/5))

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.save('stage2-256-resnet50')

In [None]:
learn.export('/kaggle/working/export.pkl')

In [None]:
test = ImageList.from_folder(TEST).add(ImageList.from_folder(PATH/'test-jpg-additional'))
len(test)

In [None]:
learn = load_learner(Path('/kaggle/working'), test=test)
preds, _ = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
thresh = 0.5
labelled_preds = [' '.join([learn.data.classes[i] for i,p in enumerate(pred) if p > thresh]) for pred in preds]

In [None]:
labelled_preds[:5]

In [None]:
fnames = [f.name[:-4] for f in learn.data.test_ds.items]

In [None]:
df_preds = pd.DataFrame({'image_name':fnames, 'tags':labelled_preds}, columns=['image_name', 'tags'])

In [None]:
df_preds.to_csv('/kaggle/working/submission.csv', index=False)

In [None]:
a = df_preds.sort_values('image_name',ascending=True)
a.head()

In [None]:
df_preds.shape

In [None]:
samplesub.tail(50)

In [None]:
samplesub.shape

In [None]:
#! kaggle competitions submit planet-understanding-the-amazon-from-space -f {'/kaggle/working/submission.csv'} -m "My submission"