## Normalization by frequency channel and time step

You can remove some artifacts in the data by normalizing every column and row of the spectrograms.


<pre>
x = np.load(f"data.npy")
x = np.vstack(x).astype(np.float32)
x = ((x - np.mean(x, axis=0)) / np.std(x, axis=0)).T
x = ((x - np.mean(x, axis=0)) / np.std(x, axis=0)).T
</pre>   


In [None]:
import numpy as np 
import pandas as pd

from PIL import Image, ImageDraw

In [None]:
BASEPATH = "../input/seti-breakthrough-listen"
df_train = pd.read_csv(f"{BASEPATH}/train_labels.csv")

In [None]:
def get_img(id, type="train", normalize=False, thumbsize=None, colorize=False, add_id=False):    
    f = f"{BASEPATH}/{type}/{id[0]}/{id}.npy"
    x = np.load(f"{BASEPATH}/{type}/{id[0]}/{id}.npy")
    c,h,w = x.shape
    x = np.vstack(x).astype(np.float32)
    
    if normalize:
        x = ((x - np.mean(x, axis=0)) / np.std(x, axis=0)).T
        x = ((x - np.mean(x, axis=0)) / np.std(x, axis=0)).T

    x = ((np.clip(x, -1, 3) + 1) / 4 * 255).astype(np.uint8)
    
    if colorize:
        x = np.stack([x]*3, axis=2)
        for i in range(0,6,2):
            x[h*i    :h*(i+1), :, 2] = 0
            x[h*(i+1):h*(i+2), :, 0] = 0
    
    
    if colorize:
        x = Image.fromarray(x)
    else:
        x = Image.fromarray(x, mode="L")
    
    if thumbsize is not None:
        x = x.resize(thumbsize, resample=Image.LANCZOS)
    
    if add_id:
        d = ImageDraw.Draw(x)
        d.text((1,1), id, fill=(0,0,0))
        d.text((0,0), id, fill=(127,127,255))
        
    return x    

In [None]:
for key, row in df_train.loc[df_train.target==1].head(10).iterrows():
    print(f"Example-ID: {row.id} - Target: {row.target}")
    mosaic = mosaic = Image.new(mode='RGB', size=(256*2+1, 256), color=(0,0,127))
    mosaic.paste(get_img(row.id, normalize=False, thumbsize=(256,256)), (0, 0))
    mosaic.paste(get_img(row.id, normalize=True, thumbsize=(256,256), colorize=True), (257, 0))    
    display(mosaic)
    
# LEFT = no normalization
# RIGHT = normalization by freq and timestep (i.e. row and column)

## A few target==1 examples more

In [None]:
all_needles = df_train.loc[df_train.target==1].id.values
grid_x = 6
grid_y = 6
thumb_size = (128, 128)
page_size = grid_x*grid_y
page = 0

def show_page(page):
    mosaic = mosaic = Image.new(mode='RGB', size=(thumb_size[0]*grid_x + grid_x-1, 
                                                  thumb_size[1]*grid_y + grid_y-1), color=(0,0,127))

    for pos, idx in enumerate(all_needles[page*page_size : (page+1)*page_size]):
        x = pos % grid_x
        x = x*thumb_size[0] + x-1
        y = pos // grid_x
        y = y*thumb_size[1] + y-1
        img = get_img(idx, normalize=True, colorize=True, thumbsize=thumb_size, add_id=True)
        mosaic.paste(img, (x, y))

    display(mosaic)

#for i in range(len(all_needles) // page_size + 1):
for i in range(10):
    show_page(i)