In [None]:
!pip install -Uqq fastai

In [None]:
import pandas as pd
from fastai.vision.all import *
from sklearn.model_selection import StratifiedKFold

In [None]:
path = Path('../input/sorghum-id-fgvc-9')

In [None]:
train_df = pd.read_csv(path/'train_cultivar_mapping.csv')

In [None]:
train_df.describe()

# Data Cleaning

In [None]:
train_df.dropna(inplace=True)

In [None]:
train_df.shape

In [None]:
train_df.describe()

In [None]:
train_df = train_df.reset_index(drop=True)

In [None]:
train_df.head()

Getting the unique values of cultivar and their counts

In [None]:

train_df.cultivar.unique()

In [None]:
train_df.cultivar.value_counts().reset_index()

# K Fold Splitting

In [None]:
N_SPLITS = 5

In [None]:

skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True)
train_df["kfold"] = -1
for fold, (_, val_index) in enumerate(skf.split(X = train_df.image.values, y = train_df.cultivar.values)):
    train_df.iloc[val_index, -1] = fold

In [None]:
train_df.head()

In [None]:
def splitter(df, fold=0):
    train_idx = df.index[df.kfold != fold].tolist()
    valid_idx = df.index[df.kfold == fold].tolist()
    return [train_idx, valid_idx]

In [None]:
for i in range(N_SPLITS):
    print(train_df[train_df.kfold == i].shape)

# DataLoaders, Training and Inference

In [None]:
def get_data(fold=0):
    datablock = DataBlock(
        blocks = (ImageBlock, CategoryBlock),
        get_x = ColReader('image', path/'train_images'),
        get_y = ColReader('cultivar'),
        splitter = splitter,
        item_tfms = Resize(225)
    )
    dls = datablock.dataloaders(train_df)
    return dls

Training the model for 5 epochs for each fold and using K Fold Cross Validation technique, then performing the inference for each fold and appending the predictions in a list.

In [None]:
predictions = []
for i in range(N_SPLITS):
    dls = get_data(i)
    learn = vision_learner(dls, resnet34, metrics = accuracy)
    learn.fine_tune(5)
    try:
        learn.export(f'path/model_fold_{i}.pkl')
        print(f'Model for fold {i} is saved.')
    except:
        print(f'Model for fold {i} is not saved.')
    
    test_images = get_image_files(path/'test')
    test_dataloader = learn.dls.test_dl(test_images)
    preds, _ = learn.get_preds(dl=test_dataloader)
    predictions.append(preds)

In [None]:
len(predictions), predictions[0].shape

In [None]:
predictions[0][0]

In [None]:
torch.stack(predictions).shape

Finding the mean of each predcitions

In [None]:
pred_idxs = torch.mean(torch.stack(predictions), axis=0)
pred_idxs.shape

# Finding the class having maximum probability

In [None]:
class_idxs = torch.argmax(pred_idxs, axis=1)
class_idxs.shape

In [None]:
class_idxs[0]

In [None]:
dls.vocab[class_idxs]

In [None]:
results = [dls.vocab[i] for i in class_idxs]

In [None]:
len(class_idxs)

In [None]:
len(test_images)

In [None]:
len(results)

# Saving predictions in a dataframe and then to a csv file

Creating a dataframe containg predictions for each image

In [None]:
images = [img.name for img in (path/'test').ls()]
submissions = pd.DataFrame(list(zip(images, results)), columns = ['filename', 'cultivar'])

In [None]:
submissions.head()

Saving the dataframe in the csv format

In [None]:
submissions.to_csv('submission.csv', index = False)