### TL;DR:
This is the train example for test species dataset, it is not full train, only 1 epoch for 1 fold because kaggle limitations.
My predictors trained 5 epoch and 5 folds, also batch size=32. Average val accuracy 98.6%, my train log:

```
Fold 0 resutls
epoch     train_loss  valid_loss  accuracy  time
0         0.767579    0.238040    0.937200  11:17
1         0.634035    0.160043    0.960419  11:16
2         0.505939    0.099926    0.974919  11:18
3         0.440220    0.063862    0.985108  11:20
4         0.424336    0.055490    0.987362  11:21

Fold 1 results
epoch     train_loss  valid_loss  accuracy  time
0         0.749576    0.311206    0.919075  13:05
1         0.626596    0.168583    0.956794  19:06
2         0.527211    0.093829    0.976389  18:50
3         0.434782    0.061494    0.985108  16:42
4         0.426031    0.058528    0.985990  12:47

Fold 2 results
epoch     train_loss  valid_loss  accuracy  time
0         0.781070    0.232682    0.937690  11:22
1         0.622824    0.142476    0.964730  12:09
2         0.518737    0.078674    0.978642  16:59
3         0.462086    0.062647    0.984129  11:21
4         0.423454    0.055090    0.986382  12:28

Fold 3 results
epoch     train_loss  valid_loss  accuracy  time
0         0.760659    0.247128    0.929061  17:27
1         0.630934    0.138731    0.966392  19:21
2         0.539373    0.091751    0.978248  19:05
3         0.445625    0.061304    0.985009  19:03
4         0.420296    0.055081    0.987164  18:49

Fold 4 results
epoch     train_loss  valid_loss  accuracy  time
0         0.769067    0.217799    0.941211  11:38
1         0.644752    0.174513    0.953655  17:56
2         0.508525    0.090482    0.975407  18:48
3         0.453215    0.067261    0.985009  19:09
4         0.426319    0.062320    0.985205  19:12
```


In [None]:
!pip install timm

In [None]:
import os
import sys
import gc

import numpy as np 
import pandas as pd 
from timm import create_model
from fastai.vision.all import *

In [None]:
root_dir = '../input/happy-whale-and-dolphin/'
train_dir = os.path.join(root_dir, 'train_images')
test_dir = os.path.join(root_dir, 'test_images')
train_csv = os.path.join(root_dir, 'train.csv')
test_csv = os.path.join(root_dir, 'sample_submission.csv')

## Read data and fix errors in species based on discussions:

In [None]:
test_df = pd.read_csv(test_csv)
test_df['path'] = test_df.image.apply(lambda x: os.path.join(test_dir, x))

In [None]:
seed = 0xDEAD
BATCH_SIZE=16

In [None]:
train_df = pd.read_csv(train_csv)
train_df.species.replace({"globis": "short_finned_pilot_whale",
                          "pilot_whale": "short_finned_pilot_whale",
                          "kiler_whale": "killer_whale",
                          "bottlenose_dolpin": "bottlenose_dolphin"}, inplace=True)
sorted(train_df.species.unique()), len(train_df.species.unique())

#### Generate images paths and classes labels:

In [None]:
class_mapper = {x: y for x, y in zip(train_df.species.unique(), range(len(train_df.species.unique())))}
train_df['path'] = train_df.image.apply(lambda x: os.path.join(train_dir, x))
train_df['label'] = train_df.species.apply(lambda x: class_mapper[x])
print(class_mapper)
train_df.head()


In [None]:
Image.open(train_df['path'][1])

### Get folds for cross-validation:

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

train_df['fold'] = -1

N_FOLDS = 5
strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['label'])):
    train_df.iloc[train_index, -1] = i
    
train_df['fold'] = train_df['fold'].astype('int')

train_df.fold.value_counts().plot.bar()
for i in range(N_FOLDS):
    for cls in train_df.species.unique():
        print(f'fold {i} class {cls} num samples {len(train_df[(train_df.fold == i) & (train_df.species==cls)])}')

# Training

In [None]:
def get_data(fold):
    train_df_f = train_df.copy()
    train_df_f['is_valid'] = (train_df_f['fold'] == fold)
    
    dls = ImageDataLoaders.from_df(train_df_f, #pass in train DataFrame
                               valid_col='is_valid', 
                               seed=seed, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='label', #label is in the first column of the DataFrame
                               y_block=CategoryBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               item_tfms=Resize(224), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) #pass in batch_tfms
    
    return dls

In [None]:
data = get_data(0)
data.show_batch(max_n=9)

In [None]:
def get_learner(fold_num):
    data = get_data(fold_num)
    model = create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=data.c)
    learn = Learner(data, model, loss_func=CrossEntropyLossFlat(), metrics=accuracy).to_fp16()
    
    return learn, data

In [None]:
learn, _ = get_learner(0)
learn.lr_find()

### Only one fold and one epoch, kaggle time limitation:

In [None]:
all_preds = []
for i in range(N_FOLDS):
    print(f'Fold {i} results')
    
    learn, dls = get_learner(fold_num=i)
    learn.fit_one_cycle(1, 1e-4, cbs=[MixUp()]) 
    learn.recorder.plot_loss()

    test_dl = dls.test_dl(test_df)
    preds, _ = learn.tta(dl=test_dl, n=4, beta=0)
    all_preds.append(preds)
    
    del learn
    torch.cuda.empty_cache()
    gc.collect()
    break

In [None]:
test_df['label'] = all_preds[0].argmax(axis=1)  # change it for full training
label_mapper = {v: k for k, v in class_mapper.items()}
test_df['species'] = test_df['label'].apply(lambda x: label_mapper[x])
plt.xticks(rotation='vertical')
test_df.species.hist(bins=26)

#### Save result to csv:

In [None]:
test_df = test_df.drop('predictions', axis=1)
test_df.to_csv('test_classes.csv', index=False)