In [1]:
from pathlib import Path

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
root_dir = Path("~/work/MLRSNet/").expanduser()
image_dir = root_dir / "Images"
csv_path = root_dir / "labels.csv"

In [3]:
df = pd.read_csv(csv_path)

# Extract labels (labels)

In [4]:
print(df.columns)

Index(['image', 'airplane', 'airport', 'bare soil', 'baseball diamond',
       'basketball court', 'beach', 'bridge', 'buildings', 'cars', 'chaparral',
       'cloud', 'containers', 'crosswalk', 'dense residential area', 'desert',
       'dock', 'factory', 'field', 'football field', 'forest', 'freeway',
       'golf course', 'grass', 'greenhouse', 'gully', 'habor', 'intersection',
       'island', 'lake', 'mobile home', 'mountain', 'overpass', 'park',
       'parking lot', 'parkway', 'pavement', 'railway', 'railway station',
       'river', 'road', 'roundabout', 'runway', 'sand', 'sea', 'ships', 'snow',
       'snowberg', 'sparse residential area', 'stadium', 'swimming pool',
       'tanks', 'tennis court', 'terrace', 'track', 'trail',
       'transmission tower', 'trees', 'water', 'wetland', 'wind turbine'],
      dtype='object')


In [5]:
labels = df.columns[1:].tolist()
print(f"There are {len(labels)} labels: {labels}")

There are 60 labels: ['airplane', 'airport', 'bare soil', 'baseball diamond', 'basketball court', 'beach', 'bridge', 'buildings', 'cars', 'chaparral', 'cloud', 'containers', 'crosswalk', 'dense residential area', 'desert', 'dock', 'factory', 'field', 'football field', 'forest', 'freeway', 'golf course', 'grass', 'greenhouse', 'gully', 'habor', 'intersection', 'island', 'lake', 'mobile home', 'mountain', 'overpass', 'park', 'parking lot', 'parkway', 'pavement', 'railway', 'railway station', 'river', 'road', 'roundabout', 'runway', 'sand', 'sea', 'ships', 'snow', 'snowberg', 'sparse residential area', 'stadium', 'swimming pool', 'tanks', 'tennis court', 'terrace', 'track', 'trail', 'transmission tower', 'trees', 'water', 'wetland', 'wind turbine']


# Confirm labels occurrence

In [6]:
stats = df[labels].sum()
stats = pd.DataFrame(stats, columns=["positives"])
n_samples = len(df)
stats["positive ratio"] = stats["positives"] / n_samples

with pd.option_context("display.precision", 3):
    display(stats)

Unnamed: 0,positives,positive ratio
airplane,2306,0.021
airport,2481,0.023
bare soil,39345,0.36
baseball diamond,1996,0.018
basketball court,3726,0.034
beach,2485,0.023
bridge,2772,0.025
buildings,51305,0.47
cars,34064,0.312
chaparral,5954,0.055


# Glance images for each label

In [7]:
def get_image_path(base: str, directory: str, postfix: str) -> str:
    basename = Path(f"{base}{postfix}")
    image_path = Path(directory) / basename
    return str(image_path)

postfix = ""
df["image_path"] = df["image"].apply(lambda base: get_image_path(base, image_dir, postfix))

In [8]:
from typing import List

def display_images(images: List[np.ndarray], n_rows: int, n_columns: int, figsize=(20, 10)):
    plt.figure(figsize=figsize)
    for i, image in enumerate(images, 1):
        plt.subplot(n_rows, n_columns, i)
        plt.imshow(image)
        plt.axis("off")
    
    plt.show()

def paths2images(paths: List[str]) -> List[np.ndarray]:
    return [mpimg.imread(image_path) for image_path in paths]

In [None]:
from IPython.display import HTML

n_rows = 2
n_columns = 10
n_images = n_rows * n_columns
figsize = (20, 4)
random_seed = 42

for label in labels:
    display(HTML(f"<h1>{label}</h1>"))

    display(HTML("<h2>Positives:</h2>"))    
    df_sub = df[df[label]==1].sample(n=n_images, random_state=random_seed)
    paths = df_sub["image_path"].tolist()
    images = paths2images(paths)
    display_images(images, n_rows, n_columns, figsize)
    
    display(HTML("<h2>Negatives:</h2>"))    
    df_sub = df[df[label]==0].sample(n=n_images, random_state=random_seed)
    paths = df_sub["image_path"].tolist()
    images = paths2images(paths)
    display_images(images, n_rows, n_columns, figsize)