This notebook visualizes the efficientnet features of the Cassava Leaf Disease Classification competition data in 2d using the t-distributed Stochastic Neighbor Embedding algorithm, aka TSNE. The efficient features are extracted separately in [this](https://www.kaggle.com/tolgadincer/cldc-efficientnet-features) notebook as the inference time with the efficientnet is relatively long.

The 5 classes given in the data are not well separated from each other based on the TSNE analysis. However, bare in mind that this result is model-dependent.

In [None]:
import os
import json
import numpy as np
import pandas as pd

import cv2
from tqdm.notebook import tqdm
import imageio

import tensorflow as tf
if tf.test.is_gpu_available():
    from cuml.manifold import TSNE

import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 200

In [None]:
BASE_DIR = '../input/cassava-leaf-disease-classification'
TRAIN_IMAGES_FOLDER = os.path.join(BASE_DIR, 'train_images')
TEST_IMAGES_FOLDER = os.path.join(BASE_DIR, 'test_images')

df = pd.read_csv(os.path.join(BASE_DIR, 'train.csv'))
df.image_id = BASE_DIR + '/train_images/' + df.image_id

with open(os.path.join(BASE_DIR, "label_num_to_disease_map.json")) as file:
    map_classes = json.loads(file.read())
    map_classes = {int(k) : v for k, v in map_classes.items()}
for keys in map_classes.keys():
    map_classes[keys] = map_classes[keys].replace('Cassava ', '')

In [None]:
features = np.load('../input/cldc-efficientnet-features/features.npy')

tsne = TSNE(n_components=2, perplexity=30, n_neighbors=120, n_iter=5000, random_state=42, learning_rate=50)
features_hat = tsne.fit_transform(features)
np.save('features_hat.npy', features_hat)

In [None]:
fig, ax = plt.subplots(1,5, figsize=(14, 3), sharex=True, sharey=True)
for i in range(5):
    ax[i].scatter(features_hat[:, 0][df.label==i],
                  features_hat[:, 1][df.label==i],
                  c=df.label[df.label==i], s=0.8)
    ax[i].set_xlabel('F0')
    if i ==0:
        ax[i].set_ylabel('F1')
    ax[i].set_title('Label-%d\n%s' % (i, map_classes[i]))

plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(4, 4), sharex=True, sharey=True)
ax.scatter(features_hat[:, 0],#[df.label!=3],
           features_hat[:, 1],#[df.label!=3],
           c=df.label, s=0.05) #[df.label!=3]
ax.set_xlabel('F0')
ax.set_ylabel('F1')

plt.tight_layout()
plt.set_cmap('Set1')
plt.show()