In [None]:
import os
from glob import glob
from math import ceil

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image

os.chdir("..")

from utils import indexed_from_arr, load_colormap

In [None]:
# Load data
CROPPED_DIR = "data/panels_a_cropped"

df_ai = pd.read_excel(os.path.join(CROPPED_DIR, "../ai_coverage_a.xlsx"))
df_hu = pd.read_excel(os.path.join(CROPPED_DIR, "../human_coverage_a.xlsx"))

image_paths = sorted(glob(os.path.join(CROPPED_DIR, "*.jpg")))
label_paths = sorted(glob(os.path.join(CROPPED_DIR, "*.png")))
assert len(image_paths) == len(label_paths)
print(len(image_paths), "images found")

In [None]:
# Split name to info
df_ai[["Coating", "Location", "Replicate"]] = df_ai["Name"].str.extract(
    r"([A-Z])(F|P)(..)")

# Clean up
df_ai = df_ai.drop(["Name", "Entropy"], axis=1)
df_ai["Others"] = 0.
df_ai = df_ai.set_index(["Coating", "Location", "Replicate", "Date"]).sort_index()
df_ai.head()

In [None]:
# Split name to info
df_hu[["Coating", "Location", "Replicate", "Date"]] = df_hu["name"].str.extract(
    r"([A-Z])(F|P)(..).([0-9]{1,2}.[0-9]{1,2}.[0-9]{2}).JPG")
df_hu["Date"] = pd.to_datetime(df_hu["Date"])

# Clean up human data
df_hu = df_hu.drop(["name"], axis=1)
df_hu = df_hu.set_index(["Coating", "Location", "Replicate", "Date"]).sort_index()
df_hu.head()

In [None]:
df_hu[df_hu.index.isin(("2019-07-05", "2019-07-03"), level="Date")
    ].groupby(["Coating", "Date"]).size()

In [None]:
# Remove losse date
df_hu = df_hu[~df_hu.index.isin(("2019-07-05", "2019-07-03"), level="Date")]
df_ai = df_ai[~df_ai.index.isin(("2019-07-05", "2019-07-03"), level="Date")]

# Remove Coatings with less replicates
df_hu = df_hu[~df_hu.index.isin(("E", "Z"), level="Coating")]
df_ai = df_ai[~df_ai.index.isin(("E", "Z"), level="Coating")]

df_hu.to_parquet(os.path.join(CROPPED_DIR, "../human_time_series_a.parquet"))
df_ai.to_parquet(os.path.join(CROPPED_DIR, "../ai_time_series_a.parquet"))

In [None]:
classes, palette = load_colormap()
classes["Others"] = classes.pop("empty")
colors = np.asarray(palette).reshape((-1, 3)) / 255

Figure 3a: Comparison of automated and manual analysis.

In [None]:
name = "PP2F"
img_paths = [p for p in image_paths if name in p and "7.5.19" not in p]
lbl_paths = [p for p in label_paths if name in p and "7.5.19" not in p]

fig, axes = plt.subplots(3, len(img_paths), figsize=(12, 6.5),
    gridspec_kw={'height_ratios': [8, 8, 1], "hspace": 0.1, "wspace": 0.2})

height = 0.2

for i, ((ax1, ax2, ax3), img, lbl) in enumerate(zip(axes.T, img_paths, lbl_paths), 1):
    img = Image.open(img)
    lbl = Image.open(lbl)

    ax1.set_title(f"Month {i}", size="large")
    ax1.imshow(img)

    ax2.imshow(lbl)

    for df, h in ((df_ai, height), (df_hu, -height)):
        cover_prev = 0
        for c in list(classes.keys()):
            cover = df.loc[name[0], name[1], name[2:]].iloc[i-1][c] * 100
            color = colors[classes[c]]
            ax3.barh(h / 2, cover, height, left=cover_prev, 
                color=color, label=c)
            cover_prev += cover

    
    ax3.set_xlim((0, 100))
    ax3.set_ylim((-height, height))
    ax3.set_xlabel("Coverage [%]")

    if i == 1:
        ax1.set_yticks([])
        ax1.xaxis.set_visible(False)
        for s in ax1.spines.values(): s.set_visible(False)
        ax1.set_ylabel("Panel image", size="large", labelpad=10)

        ax2.set_yticks([])
        ax2.xaxis.set_visible(False)
        for s in ax2.spines.values(): s.set_visible(False)
        ax2.set_ylabel("Segmentation", size="large", labelpad=10)

        ax3.set_yticks((-height / 2, height / 2))
        ax3.set_yticklabels(["Human", "U-Net"])
    else:
        ax1.set_axis_off()
        ax2.set_axis_off()
        ax3.set_yticks([])

plt.savefig("results/figures/sample time series.svg", bbox_inches="tight", dpi=300)

Figure 3b: Comparison of automated and manual analysis.

In [None]:
coatings = df_ai.index.unique(level="Coating")
dates = df_ai.index.unique(level="Date")

fig, axes = plt.subplots(2, ceil(len(coatings)/2), figsize=(10, 5.25))

width = 0.4
inds = np.arange(1, len(dates) + 1)

for i, (ax, coat) in enumerate(zip(axes.ravel(), coatings)):
    for df, w in ((df_ai, -width), (df_hu, width)):
        n = len(df.loc[coat].index.unique(level="Replicate"))
        cover_prev = 0
        for c in list(classes.keys()):
            cover = df.loc[coat][c].groupby("Date")
            yerr  = cover.std() / np.sqrt(n) * 100
            cover = cover.mean() * 100
            color = colors[classes[c]]
            ax.bar(inds + w / 2, cover, width, bottom=cover_prev, 
                color=color, label=c, yerr=yerr, 
                error_kw={"capsize":0, "elinewidth":1})
            
            cover_prev += cover

    ax.set_title(f"Coating {coat}", pad=10)
    ax.set_xticks(inds)
    
    ax.set_ylim((0, 100))
    if not i % axes.shape[1]:
        ax.set_ylabel("Coverage [%]")
    if i >= axes.shape[1]:
        ax.set_xlabel("Months")

handles, labels = ax.get_legend_handles_labels()
by_label = dict(zip(labels, handles))
fig.legend(by_label.values(), by_label.keys(), loc="lower center", ncol=6,
    bbox_to_anchor=(0.5, -0.12), frameon=False)

fig.tight_layout()
plt.annotate("Bars: $\it{left}$ U-Net, $\it{right}$ human", 
    (0.014, 0.11), xycoords="figure fraction")
plt.savefig("results/figures/big time series.svg", bbox_inches="tight")