## Setup

In [1]:
import os

import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
import seaborn as sns
sns.set()
sns.set_style("dark")
sns.set_palette(plt.viridis())

import cv2 as cv
import brisque

In [5]:
camera_view = 'KL11-E1DC'
base_path = '../../data/real/'
availible_ids = [int(file.split('.')[0])
                 for file in os.listdir(f"../../data/real/{camera_view}/") 
                 if file.endswith(".png")]

imgbank = {id_: cv.imread(os.path.join(base_path, camera_view, str(id_) + '.png'), cv.IMREAD_GRAYSCALE)
          for id_ in availible_ids}

imgbank_ds = {id_: cv.pyrDown(img) for id_, img in imgbank.items()}

In [6]:
# write out downsampled copies of all images
#[cv.imwrite(os.path.join(base_path, camera_view, 'ds', str(id_) + '.png'), cv.pyrDown(img)) for id_, img in imgbank.items() ]

In [7]:
def plot_n(imgs, ids, n, title):
    fig = plt.figure(1, figsize=(14.5,8))
    
    for i in range(n):
        plt.subplot(200 + 10*n/2 + 1 + i)
        plt.imshow(imgs[i], vmin=0, vmax=255)
        ax = plt.gca()
        ax.tick_params(
                bottom=False,
                left=False,
                labelbottom=False,
                labelleft=False)
        plt.title(ids[i])
        
    fig.suptitle(title, fontsize=15, weight=600)
    #fig.subplots_adjust(hspace=0.01, wspace=0.005)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    fig.subplots_adjust(hspace=0.0, wspace=0.0)
    plt.show()
    
def get_imgs_from_dict(ids, ds=False):
    if ds:
        return [imgbank_ds[id_] for id_ in ids]
    else:
        return [imgbank[id_] for id_ in ids]
    
    
def show_metric(df, metric, lowest, downsampled, n=8):
    sorted_df = df[metric].sort_values()
    
    if lowest:
        low = sorted_df[:n].index
        plot_n(get_imgs_from_dict(low, downsampled), low, n, f'ds={downsampled} - lowest {metric}')
    if not lowest:
        hi = sorted_df[-n:].index[::-1]
        plot_n(get_imgs_from_dict(hi, downsampled), hi, n, f'ds={downsampled} - highest {metric}')        

# Metrics

Calculate and add to a common df

## Brisque

the lower the BRISQUE the better the image looks.

In [None]:
brisq = brisque.BRISQUE()
scores = {id_: brisq.get_score(os.path.join(base_path, camera_view, str(id_) + '.png')) 
          for id_ in imgbank.keys()}

scores_ds = {id_: brisq.get_score(os.path.join(base_path, camera_view, 'ds', str(id_) + '.png')) 
             for id_ in imgbank_ds.keys()}

In [None]:
df = pd.DataFrame.from_dict(scores, orient='index', columns=['brisque'])
df.index.name = 'id'

df_ds = pd.DataFrame.from_dict(scores_ds, orient='index', columns=['brisque'])
df_ds.index.name = 'id'

In [None]:
# take top/bottom n images 
n = 8

In [None]:
show_metric(df, 'brisque', lowest=True, downsampled=False)

In [None]:
show_metric(df, 'brisque', lowest=False, downsampled=False)

BRISQUE definitely looks like it is capturing some information about overall image 'quality' - above and beyond just noise.

In [None]:
show_metric(df_ds, 'brisque', lowest=True, downsampled=True)

In [None]:
show_metric(df_ds, 'brisque', lowest=False, downsampled=True)

For the DS images, BRISQUE seems to rate highly images with a high dynamic range, and rate poorly images that are more washed out

## Histogram based

Just mean and var for now

In [None]:
def get_hist_mean_var(image):
    # note there is actually no need to get a histogram first - we can do this directly
    n, bins = np.histogram(image.flatten(), 255, density=True)

    mids = 0.5*(bins[1:] + bins[:-1])
    mean = np.average(mids, weights=n)
    var = np.average((mids - mean)**2, weights=n)

    return n, mean, var**0.5

In [None]:
means = []
sigmas = []

means_ds = []
sigmas_ds = []

for id_ in availible_ids:
    img = imgbank[id_]
    img_ds = imgbank_ds[id_]
    
    hist, mean, sigma = get_hist_mean_var(img)
    hist_ds, mean_ds, sigma_ds = get_hist_mean_var(img_ds)
   
    means.append(mean)
    sigmas.append(sigma)
    
    means_ds.append(mean_ds)
    sigmas_ds.append(sigma_ds)

In [None]:
df['hist_mean'] = means
df['hist_sigma'] = sigmas

maxs = {id_: img.max() for id_, img in imgbank.items()}
mins = {id_: img.min() for id_, img in imgbank.items()}

df['hist_max'] = pd.Series(maxs)
df['hist_min'] = pd.Series(mins)
df['hist_range'] = df['hist_max'] - df['hist_min']

In [None]:
df_ds['hist_mean'] = means_ds
df_ds['hist_sigma'] = sigmas_ds

maxs_ds = {id_: img.max() for id_, img in imgbank_ds.items()}
mins_ds = {id_: img.min() for id_, img in imgbank_ds.items()}

df_ds['hist_max'] = pd.Series(maxs_ds)
df_ds['hist_min'] = pd.Series(mins_ds)
df_ds['hist_range'] = df_ds['hist_max'] - df_ds['hist_min']

In [None]:
show_metric(df, 'hist_mean', lowest=True, downsampled=False)

In [None]:
show_metric(df, 'hist_mean', lowest=False, downsampled=False)

Max and mins of histogram means does what you would expect - although due to high noise there is actually not a large difference between the extremes.

In [None]:
df['hist_mean'].min(), df['hist_mean'].max()

In [None]:
show_metric(df_ds, 'hist_mean', lowest=True, downsampled=True)

In [None]:
show_metric(df_ds, 'hist_mean', lowest=False, downsampled=True)

In [None]:
df_ds['hist_mean'].min(), df_ds['hist_mean'].max()

> After DS however, the difference is night and day. We can effective separate over and under exposed images. 

I said before realising that plt.imshow will automatically change the colour mapping based on the limits of the input data provided

In [None]:
show_metric(df, 'hist_sigma', lowest=True, downsampled=False)

In [None]:
show_metric(df, 'hist_sigma', lowest=False, downsampled=False)

Looking at histogram variance, as expected, gives information about contrast.

In [None]:
show_metric(df_ds, 'hist_sigma', lowest=True, downsampled=True)

In [None]:
show_metric(df_ds, 'hist_sigma', lowest=False, downsampled=True)

Again, the differences look much clearer after downsampling: the low variances images, whether they are bright or dark, are mostly one thing (i.e. the whole image is dim or the whole image is bright). Meanwhile, at the high end, we get images with bright regions, and also dark regions.

## Noise

A quick and dirty way of quantifying noise is taking

$$\langle|image - smoothing(image)|\rangle$$

Tried this with three different smoothing filters to see which would give the best results

In [None]:
def get_noise(img):
    smooth = cv.medianBlur(img,5)
    
    diff = np.abs(img.astype(int) - smooth.astype(int))
    return diff.mean(), diff.std()


def get_noise_bl(img):
    smooth = cv.bilateralFilter(img, d=3,
                            sigmaColor=50,
                            sigmaSpace=50)
    
    diff = np.abs(img.astype(int) - smooth.astype(int))
    return diff.mean(), diff.std()


def get_noise_nlm(img):
    smooth = cv.fastNlMeansDenoising(img, h=5, searchWindowSize=3)
    
    diff = np.abs(img.astype(int) - smooth.astype(int))
    return diff.mean(), diff.std()

In [None]:
med_mean = np.array([get_noise(img)[0] for img in imgbank.values()]).mean()
med_std = np.array([get_noise(img)[0] for img in imgbank.values()]).std()

med_mean_ds = np.array([get_noise(img)[0] for img in imgbank_ds.values()]).mean()
med_std_ds = np.array([get_noise(img)[0] for img in imgbank_ds.values()]).std()

print(f'full image mean: {med_mean:.3f} +/- {med_std:.2f}')
print(f'ds image mean: {med_mean_ds:.3f} +/- {med_std_ds:.2f}')

In [None]:
bl_mean = np.array([get_noise_bl(img)[0] for img in imgbank.values()]).mean()
bl_std = np.array([get_noise_bl(img)[0] for img in imgbank.values()]).std()

bl_mean_ds = np.array([get_noise_bl(img)[0] for img in imgbank_ds.values()]).mean()
bl_std_ds = np.array([get_noise_bl(img)[0] for img in imgbank_ds.values()]).std()

print(f'full image mean: {bl_mean:.3f} +/- {bl_std:.2f}')
print(f'ds image mean: {bl_mean_ds:.3f} +/- {bl_std_ds:.2f}')

In [None]:
nlm_mean = np.array([get_noise_nlm(img)[0] for img in imgbank.values()]).mean()
nlm_std = np.array([get_noise_nlm(img)[0] for img in imgbank.values()]).std()

nlm_mean_ds = np.array([get_noise_nlm(img)[0] for img in imgbank_ds.values()]).mean()
nlm_std_ds = np.array([get_noise_nlm(img)[0] for img in imgbank_ds.values()]).std()

print(f'full image mean: {nlm_mean:.3f} +/- {nlm_std:.2f}')
print(f'ds image mean: {nlm_mean_ds:.3f} +/- {nlm_std_ds:.2f}')

In [None]:
plt.figure(figsize=(2,4))
ax = plt.gca()

ax.set_xticks([-1, 0, 1])
ax.set_xticklabels(['median', 'bl', 'nlm'])


plt.errorbar([-1, -1], [med_mean, med_mean_ds], 
             yerr=[med_std, med_std_ds], fmt='o')
plt.errorbar([0, 0], [bl_mean, bl_mean_ds], 
             yerr=[bl_std, bl_std_ds], fmt='o')
plt.errorbar([1, 1], [nlm_mean, nlm_mean_ds], 
             yerr=[nlm_std, nlm_std_ds], fmt='o')


let's use the bilateral based thing - though maybe median is a better choice as it's probably more efficient...

In [None]:
noises = []
noises_ds = []

for id_ in availible_ids:
    img = imgbank[id_]
    img_ds = imgbank_ds[id_]
    
    noise = get_noise_bl(img)[0]
    noise_ds = get_noise_bl(img_ds)[0]
    
    noises.append(noise)
    noises_ds.append(noise)
    
df['noise_bl'] = noises
df_ds['noise_bl'] = noises_ds

In [None]:
show_metric(df, 'noise_bl', lowest=True, downsampled=False)

In [None]:
show_metric(df, 'noise_bl', lowest=False, downsampled=False)

Definitely working as expected

In [None]:
show_metric(df_ds, 'noise_bl', lowest=True, downsampled=True)

In [None]:
show_metric(df_ds, 'noise_bl', lowest=False, downsampled=True)

Difference is less pronunced, because we have already removed a lot of the noise in the DS step, but still looks useful.

## Blur

In [None]:
def get_blur(img):      
    return -cv.Laplacian(img, cv.CV_64F).var()

blurs = []
blurs_ds = []

for id_ in availible_ids:
    img = imgbank[id_]
    img_ds = imgbank_ds[id_]
    
    blur = get_blur(img)
    blur_ds = get_blur(img_ds)
    
    blurs.append(blur)
    blurs_ds.append(blur_ds)
    
df['blur'] = blurs
df_ds['blur'] = blurs_ds

In [None]:
show_metric(df, 'blur', lowest=True, downsampled=False)

In [None]:
show_metric(df, 'blur', lowest=False, downsampled=False)

In [None]:
show_metric(df_ds, 'blur', lowest=True, downsampled=True)

In [None]:
show_metric(df_ds, 'blur', lowest=False, downsampled=True)

In [None]:
g,d,b='good','decent','bad'
#redo
eyeball = pd.Series([b,b,b,b,b,b,b,g,g,b,
                     g,g,g,b,g,g,g,g,g,g,
                     b,b,g,b,g,b,b,b,b,g,
                     g,b,g,g,g,b,b,g,g,g,
                     g,b,g,g,g,b,g,g,b,g,
                     b,b,b,b,b,g,b,b,g,b,
                     g,b,g,b,b,b,b,g,g,b,
                     b,g,b,b,g,g,g,b,b,b,
                     g,b,g,b,b,g,b,g,b,g,
                     b,g,b,b,b,b,g,b,g,g,#
                     b,g,b,g,b,g,g,b,b],
                     index=availible_ids)

df['subjective_category'] = eyeball
g = sns.pairplot(df, hue='subjective_category')

In [None]:
df_ds['subjective_category'] = eyeball
g = sns.pairplot(df_ds, hue='subjective_category')

# Cluster

In [None]:
from sklearn.neighbors import NearestNeighbors
from sklearn import preprocessing

def normalize_df(df, dropminmax=True, dropmean=True):
    
    df = df.drop(columns='subjective_category')
    
    if dropminmax:
        df = df.drop(columns=['hist_min', 'hist_max'])
    if dropmean:
        df = df.drop(columns='hist_mean')
    
    min_max_scaler = preprocessing.MinMaxScaler((0,100))

    df_scaled = pd.DataFrame(min_max_scaler.fit_transform(df), columns=df.columns, index=df.index)
    
    return df_scaled

In [None]:
X = normalize_df(df).values
nbrs = NearestNeighbors(n_neighbors=8).fit(X)
distances, indices = nbrs.kneighbors(X)

argmax = distances.mean(axis=1).argmax()
argmin = distances.mean(axis=1).argmin()

In [None]:
max_dist = df.index[indices[argmax]].values
dists = distances[argmax]
titles = list(zip(max_dist,np.around(dists)))
plot_n(get_imgs_from_dict(max_dist, ds=False), titles, 8, 'Furthest Neighbours to top left (id, dist)')

In [None]:
min_dist = df.index[indices[argmin]].values
dists = distances[argmin]
titles = list(zip(min_dist,np.around(dists)))
plot_n(get_imgs_from_dict(min_dist, ds=False), titles, 8, 'Closest Neighbours to top left (id, dist)')

In [None]:
X = normalize_df(df_ds).values
nbrs = NearestNeighbors(n_neighbors=8).fit(X)
distances, indices = nbrs.kneighbors(X)

argmax = distances.mean(axis=1).argmax()
argmin = distances.mean(axis=1).argmin()

In [None]:
max_dist = df_ds.index[indices[argmax]].values
dists = distances[argmax]
titles = list(zip(max_dist,np.around(dists)))
plot_n(get_imgs_from_dict(max_dist, ds=True), titles, 8, 'DS Furthest Neighbours to top left (id, dist)')

In [None]:
min_dist = df_ds.index[indices[argmin]].values
dists = distances[argmin]
titles = list(zip(min_dist,np.around(dists)))
plot_n(get_imgs_from_dict(min_dist, ds=True), titles, 8, 'DS Closest Neighbours to top left (id, dist)')

Do the same thing again but select neighbours within a radius

In [None]:
X = normalize_df(df, False, False).values
nbrs = NearestNeighbors(n_neighbors=8).fit(X)
distances, indices = nbrs.radius_neighbors(radius=15)

In [None]:
print('for each image, number of images within 15 units of it')
np.array([c.shape[0] for c in indices])

# PCA

In [None]:
from sklearn.decomposition import PCA

X = normalize_df(df).values
X_ds = normalize_df(df_ds).values

model = PCA(n_components=2)
model.fit(X)
X_2D = model.transform(X)

model_ds = PCA(n_components=2)
model.fit(X_ds)
X_2D_ds = model.transform(X_ds)

df_ = df.copy()
df_['PCA1'] = X_2D[:, 0]
df_['PCA2'] = X_2D[:, 1]

df_ds_ = df_ds.copy()
df_ds_['PCA1'] = X_2D_ds[:, 0]
df_ds_['PCA2'] = X_2D_ds[:, 1]

def f(x):
    if x == 'bad':
        return 0
    else:
        return 1
    
c = list(map(f, list(df_['subjective_category'].values)))
c_ds = list(map(f, list(df_ds_['subjective_category'].values)))

fig = plt.figure(figsize=(12,6))
plt.subplot(121)
plt.scatter(X_2D[:, 0], X_2D[:, 1], c=c)
plt.xlabel('PCA1')
plt.ylabel('PCA2')
plt.title('Full res')

plt.subplot(122)
plt.scatter(X_2D_ds[:, 0], X_2D_ds[:, 1], c=c_ds)
plt.xlabel('PCA1')
plt.ylabel('PCA2')
plt.title('Downsampeld')

There could be interesting stats to be done here, but we need more images to come to any conclusions

# Pipeline Approach

Clustering doesn't make sense. Consider an image. If the image has low contrast, we need to boost the contrast a lot, introducing a lot of noise. Then we have to strongly denoise it after based on its new noise profile. It is not obvious that we would be able to know in advance how much denoising we would need based on the initial information in the image. Rather, each stage should analyse the image as it is currently and decide what preprocessing to employ.

In [None]:
pd.DataFrame(df.mean(), columns=['mean'])

In [None]:
def plot_single(img_id, df):
    titles = df.mean().index
    means = df.mean().values
    
    this_df = pd.DataFrame(df.mean(), columns=['mean'])
    
    this_df['max'] = df.max()
    this_df['min'] = df.min()
    
    this_df[str(img_id)] = df.loc[img_id]
    this_df = this_df.T
    
    this_df['source'] = ['mean', 'max', 'min', 'this']
    
    return this_df


plot_single(90112, df)
sns.pairplot(plot_single(availible_ids[0], df), hue='source')

## Contrast

assumption in testing: we can map the detected contrast in an image to the amount of cc that needs to be applied independenrt of other params (note we can actually relax this assumption by just taking those things into account)

In [None]:
def correct_contrast(img, clip_limit, n_tiles_per_row):

    # create the object
    clahe = cv.createCLAHE(clipLimit=clip_limit, 
                            tileGridSize=(n_tiles_per_row, n_tiles_per_row))
    
    # apply equalisation
    return clahe.apply(img)

In [None]:
show_metric(df_ds, 'hist_sigma', lowest=True, downsampled=True)

In [None]:
sorted_by_sigma = df_ds['hist_sigma'].sort_values()

clip_limits = 1 / (sorted_by_sigma[:n]**3.2 / 1e5 * 1.5)
clip_limits

In [None]:
df_ds.loc[clip_limits.index]['hist_sigma'].values

In [None]:
hi = sorted_by_sigma[:n].index
imgs = get_imgs_from_dict(hi, ds=True)

imgs_cc = [correct_contrast(img, clip+1.6, 10) for img, clip in zip(imgs, clip_limits)]

plot_n(imgs_cc, hi, n, 'DS low hist_sigma contrast corrected')

In [None]:
show_metric(df_ds, 'hist_sigma', lowest=False, downsampled=True)

In [None]:
clip_limits = 1 / (sorted_by_sigma[:n]**3.2 / 1e5 * 2)
clip_limits

In [None]:
df.loc[hi]['hist_sigma']

In [None]:
sorted_by_sigma = df_ds['hist_sigma'].sort_values()
hi = sorted_by_sigma[-n:].index[::-1]
imgs = get_imgs_from_dict(hi, ds=True)

imgs_cc = [correct_contrast(img, 2, 10) for img, clip in zip(imgs, clip_limits)]

plot_n(imgs_cc, hi, n, 'DS high hist_sigma contrast corrected')

some kind of inconsistencies here, check with other metrics

just noticed this iamge seems to contain motion blur straight up

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(correct_contrast(get_imgs_from_dict([89065], ds=True)[0], 10, 10), vmin=0, vmax=255)

In [None]:
# numerically fitted exponential
def get_clip_lim(hist_sigma):
    clip_lim = 0.3691732 + 111.0668*np.exp(-0.1625793*hist_sigma)
    return clip_lim + 1.6

# numerically fitted sigmoid
def get_tile_size(hist_sigma):
    tile_size = 9.945744 + (19.95339 - 9.945744)/(1 + (hist_sigma/36.68951)**38.40906)
    return int(tile_size)

In [None]:
sample = df_ds['hist_sigma'].sort_values().iloc[np.linspace(0, len(df_ds)-1, 8).astype(int)]

clip_lims = [get_clip_lim(hist_sigma) for hist_sigma in sample.values]
tile_sizes = [get_tile_size(hist_sigma) for hist_sigma in sample.values]

imgs = get_imgs_from_dict(sample.index, ds=True)
imgs_cc = [correct_contrast(img, clip_lim, tile_size) 
           for img, clip_lim, tile_size in zip(imgs, clip_lims, tile_sizes)]

plot_n(imgs_cc, sample.index, 8, 'Corrected scan accross hist_sigma')

Bearing in mind the original images, below, this is a pretty decent result.

In [None]:
plot_n(imgs, sample.index, 8, 'Scan accross image hist_sigma')

First apply this CC accross all images

In [None]:
clip_limits = df_ds['hist_sigma'].map(get_clip_lim)
tile_sizes = df_ds['hist_sigma'].map(get_tile_size)
imgs = get_imgs_from_dict(df_ds.index, ds=True)

imgs_cc = [correct_contrast(img, clip_lim, tile_size) 
           for img, clip_lim, tile_size in zip(imgs, clip_limits, tile_sizes)]
imgbank_cc = {id_: img_cc for id_, img_cc in zip(availible_ids, imgs_cc)}

len(imgs_cc)

In [None]:
noises_after_cc = [get_noise_bl(img_cc)[0] for img_cc in imgs_cc]
df_ds['noise_after_cc'] = noises_after_cc

In [None]:
plt.hist(df_ds['noise_bl'], bins=20, range=(0,12), label='Before CC', histtype='step', linewidth=3)
plt.hist(df_ds['noise_after_cc'], bins=20, range=(0,12), label='After CC', histtype='step', linewidth=3)
plt.xlabel('Noise')
plt.ylabel('Count')
plt.legend()

In [None]:
hist_sigmas_after_cc = [get_hist_mean_var(img_cc)[2] for img_cc in imgs_cc]
df_ds['hist_sigma_after_cc'] = hist_sigmas_after_cc

In [None]:
plt.hist(df_ds['hist_sigma'], bins=20, range=(25,50), label='Before CC', histtype='step', linewidth=3)
plt.hist(df_ds['hist_sigma_after_cc'], bins=20, range=(25,50), label='After CC', histtype='step', linewidth=3)
plt.xlabel('hist_sigma')
plt.ylabel('Count')
plt.legend()

Surprising results. Investigate this further by recalculating all image metrics at every step and seeing what is actually changing

## Denoise After CC

In [None]:
# wrap opencv functions so they are compatible with plots
def bilateral_filter(img, filter_size=5, sigma=70):
    
    # it's not clear exactly what the parameters represent in the docs...
    # (the sigmaSpace parameter appears to be superfluous)
    return cv.bilateralFilter(img, d=filter_size,
                               sigmaColor=sigma,
                               sigmaSpace=sigma)

def nlm_filter(img, strength=2.5, window_size=6):
    return cv.fastNlMeansDenoising(img, h=strength, searchWindowSize=window_size)

In [None]:
sorted_by_noise = df_ds['noise_after_cc'].sort_values()

In [None]:
hi = sorted_by_noise[-n:].index.values[::-1]
imgs = [imgbank_cc[id_] for id_ in hi]

plot_n(imgs, hi, n, 'DS highest noise after contrast corrected')

In [None]:
imgs_bl = [bilateral_filter(img, 30, 10) for img in imgs]

plot_n(imgs_bl, hi, n, 'DS highest noise after contrast corrected > denoised')