In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
from warnings import simplefilter
import os


simplefilter("ignore")

def hide_spines(ax, spines=["top", "right", "left", "bottom"]):
    for spine in spines:
        ax.spines[spine].set_visible(False)
        
    return ax

In [None]:
train_images_dir = "../input/happy-whale-and-dolphin/train_images"
train_path = "../input/happy-whale-and-dolphin/train.csv"
train = pd.read_csv(train_path)
train["image"] = train["image"].apply(lambda filename: os.path.join(train_images_dir, filename))

In [None]:
fig = plt.figure(figsize=(25, 10))
fig.set_facecolor("#fff")
ax = fig.add_subplot()
ax.set_facecolor("#fff")
ax.grid(color="lightgrey", alpha=0.7, axis="both", zorder=0)

sns.countplot(x="species", data=train, palette="magma", zorder=2)
plt.xticks(rotation=45)

ax.xaxis.set_tick_params(labelsize=10, size=0, pad=5)
ax.yaxis.set_tick_params(labelsize=10, size=0, pad=5)
ax = hide_spines(ax)

ax.set_ylabel("Count", fontsize=14, labelpad=10)
ax.set_xlabel("Specie", fontsize=14, labelpad=20)

ax.set_title("Species Distribution", loc="left", fontsize=25, fontweight="bold")
fig.show()

In [None]:
def load_image(path, channels=cv2.COLOR_BGR2RGB):
    if os.path.exists(path):
        image = cv2.imread(path)
        image = cv2.cvtColor(image, channels)
        image = np.asarray(image)
        return image
    else:
        raise Exception(f"Path '{path}' doesn't exist.")

In [None]:
species = train["species"].unique()
n = 5
rows, columns = len(species), n
fig, axes = plt.subplots(len(species), n, figsize=(3*columns, 3*rows))
for i, specie in enumerate(species):
    specie_samples = train[train["species"] == specie][:n]
    specie = specie.replace("_", " ").title()
    for j, (index, sample) in enumerate(specie_samples.iterrows()):
        sample_path = sample["image"]
        image = load_image(sample_path)
        
        ax = axes[i, j]
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        ax.imshow(image)
        
        if j == 0:
            ax.set_title(specie, loc="left", y=1.05, fontsize=20, fontweight="bold")
        
fig.tight_layout()
fig.show()        

In [None]:
fig = plt.figure(figsize=(15, 7))
fig.set_facecolor("#fff")
ax = fig.add_subplot()
ax.set_facecolor("#fff")
ax.grid(color="lightgrey", alpha=0.7, axis="both", zorder=0)
n = 10
individual_id_unique = train["individual_id"].value_counts()[:n]
individual_id_counts = individual_id_unique.values
individual_id_unique = individual_id_unique.index.to_numpy()

sns.barplot(x=individual_id_unique, y=individual_id_counts, palette="magma", zorder=2)
plt.xticks(rotation=0)

ax.xaxis.set_tick_params(labelsize=10, size=0, pad=5)
ax.yaxis.set_tick_params(labelsize=10, size=0, pad=5)
ax = hide_spines(ax)

ax.set_ylabel("Count", fontsize=14, labelpad=10)
ax.set_xlabel("Individual ID", fontsize=14, labelpad=20)

ax.set_title(f"TOP {n} Individual ID", loc="left", fontsize=25, fontweight="bold")
fig.show()