In [None]:
import json
import numpy as np
import pandas as pd
import pathlib
from pathlib import Path
from complexcgr import FCGR

KMER = 6

In [None]:
path="/data/bacteria/experiments/autoencoders/6mer/fcgr/dustbin__10/SAMN02744650.npy" 
m = np.load(path) 
FCGR(6).plot(m)

In [None]:
KMER = 12
with open(f"precomputed_fcgr_{KMER}mer.json","w") as fp:
    json.dump(
        FCGR(KMER).kmer2pixel, fp, indent=4
    )
    

In [None]:
m = np.load("/data/bacteria/test-experiments/6mer/fcgr/vibrio_cholerae__01/SAMD00006230.npy") 
m=np.load("/data/bacteria/experiments/autoencoders/6mer/fcgr/burkholderia_pseudomallei__01/SAMEA949364.npy")
m

In [None]:
FCGR(6).plot(m)

In [None]:
FCGR(6).save_img(m, path="../plots/paper/burkholderia_pseudomallei__01-SAMEA949364.jpg")

In [None]:
m=np.load("/data/bacteria/experiments/autoencoders/6mer/fcgr/burkholderia_multivorans__01/SAMEA3924732.npy")
FCGR(6).plot(m)

In [None]:
FCGR(6).save_img(m, path="../plots/paper/burkholderia_multivorans__01-SAMEA3924732.jpg")

___
Interpolation

In [None]:
list_imgs = list(pathlib.Path("../preds/decoder/").rglob("chlamydia*.npy"))
# list_imgs = sorted(list_imgs, key=lambda p: int(p.stem.split("-")[-1]))
# list_imgs
list_imgs[0]

In [None]:
# all_inputs = [np.load(p) for p in list_imgs]
all_inputs = np.load(list_imgs[0])

In [None]:
ncols = 5
nrows = int(len(all_inputs) / ncols) + 1   

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
fig = plt.figure(1,(12,20))
grid = ImageGrid(fig, 111,
                 nrows_ncols=(nrows,ncols),
                 axes_pad=0.1,
                 )

for j, inp in enumerate(all_inputs):
    grid[j].imshow(FCGR(6).plot(inp),'gray',)
    

___

In [None]:
all_inputs=np.load(Path("../preds/inputs.npy"))
all_outputs=np.load(Path("../preds/outpuss.npy"))

In [None]:
FCGR(6).plot(all_inputs[0])

In [None]:
img=FCGR(6).plot(all_outputs[0])
type(img)

Images from autoencoder`left`: inputs `right`: outputs

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
fig = plt.figure(1,(20,30))
grid = ImageGrid(fig, 111,
                 nrows_ncols=(20,2),
                 axes_pad=0.1,
                 )
i=0
for inp, out in zip(all_inputs, all_outputs):
    grid[i].imshow(FCGR(6).plot(inp),'gray',)
    grid[i+1].imshow(FCGR(6).plot(out),'gray',)
    i+=2
    

___
### Visualize a set of images

In [None]:
import matplotlib.pyplot as plt
from PIL.Image import Image as PilImage
import textwrap, os

def display_images(
    images, k, 
    columns=5, width=25, height=8, max_images=30, 
    label_wrap_length=50, label_font_size=15,
    path_save=None):

    if not images:
        print("No images to display.")
        return 

    if len(images) > max_images:
        print(f"Showing {max_images} images of {len(images)}:")
        images=images[0:max_images]

    # height = max(height, int(len(images)/columns) * height)
    fig = plt.figure(figsize=(width, height))
    fig.subplots_adjust(hspace=.3)

    for i, image in enumerate(images):

        plt.subplot(int(len(images) / columns + 1), columns, i + 1)
        m=np.load(image)
        plt.imshow(FCGR(k).array2img(m),"gray")

        # if hasattr(image, 'filename'):
        title=Path(image).parent.stem.split("__")[0] + "|" + Path(image).stem 
        if title.endswith("/"): title = title[0:-1]
        title=os.path.basename(title)
        title=textwrap.wrap(title, label_wrap_length)
        title="\n".join(title)
        plt.title(title, fontsize=label_font_size)
    
    plt.tight_layout()
    
    if path_save:
        Path(path_save).parent.mkdir(exist_ok=True, parents=True)
        fig.savefig(path_save, dpi=300)
    # plt.close(fig)

___
### Outliers Train
From `notebooks/clean-lab.ipynb`

In [None]:
PATH_EXP = Path("/data/bacteria/experiments/autoencoders/6mer/27122023-5")
imgs_train = pd.read_csv( PATH_EXP.joinpath("test/outliers-train.csv"))
imgs_train["name"] = imgs_train.path_npy.apply(lambda p: pathlib.Path(p).parent.stem.split("__")[0])
imgs_train = imgs_train.sort_values(by="name")["path_npy"].tolist()

In [None]:
display_images(imgs_train, k=KMER, 
            columns=5, width=17, height=8, max_images=10, 
            label_wrap_length=50, label_font_size=11,
            path_save=f"../plots/{KMER}mers/complexcgr-readme.png")

In [None]:
display_images(imgs_train[30:], k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/outliers-train_31-60.png")

In [None]:
display_images(imgs_train[60:], k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/outliers-train_61-90.png")

In [None]:
display_images(imgs_train[90:], k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/outliers-train_91-100.png")

### Plot outliers in the  train set, include random examples from the same species

In [None]:
from collections import defaultdict
paths_by_specie = defaultdict(list)

PATH_FCGR=Path("/data/bacteria/experiments/autoencoders/6mer/fcgr/")
for path in imgs_train:
    name = Path(path).parent.stem.split("__")[0]
    paths_by_specie[name].append(path)

# step = 0
for name, list_paths in paths_by_specie.items():
    
    imgs = [p for p in Path(PATH_FCGR).joinpath(f"{name}__01").rglob("*.npy") if str(p) not in list_paths]
    
    print(f"{name} | {len(list_paths)} outliers | {len(imgs)} availables" )
    
    # for path in list_paths:
    imgs = list_paths + imgs

    display_images(imgs, k=KMER, max_images=20, path_save=f"../plots/{KMER}mers/outliers/train-{name}.png")
    
    # step +=1 
    # if step > 0: break
 

___
### Outliers Test
From `notebooks/clean-lab.ipynb`

In [None]:
PATH_EXP = Path("/data/bacteria/experiments/autoencoders/6mer/27122023-5")
imgs_test = pd.read_csv( PATH_EXP.joinpath("test/outliers-test.csv"))
imgs_test["name"] = imgs_test.path_npy.apply(lambda p: pathlib.Path(p).parent.stem.split("__")[0])
imgs_test = imgs_test.sort_values(by="name")["path_npy"].tolist()

In [None]:
display_images(imgs_test, k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/outliers-test.png")

### Plot outliers in the  test set, include random examples from the same species

In [None]:
paths_by_specie = defaultdict(list)

PATH_FCGR=Path("/data/bacteria/experiments/autoencoders/6mer/fcgr/")
for path in imgs_test:
    name = Path(path).parent.stem.split("__")[0]
    paths_by_specie[name].append(path)

for name, list_paths in paths_by_specie.items():
    imgs = [p for p in Path(PATH_FCGR).rglob(f"{name}*/*.npy") if str(p) not in list_paths]
    
    # for path in list_paths:
    imgs = list_paths + imgs

    display_images(imgs, k=KMER, max_images=20, path_save=f"../plots/{KMER}mers/outliers/test-{name}.png")

___
### Plot for one species in particular

In [None]:
name = "enterococcus_faecalis"
imgs = list(Path(f'/data/bacteria/experiments/autoencoders/6mer/fcgr/{name}__01/').rglob("*.npy"))
display_images(imgs, k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/{name}.png")

___
___
# Plot one sample per species

In [None]:
from pathlib  import Path
KMER=6

imgs = [str(img) for img in list(Path(f"/data/bacteria/autoencoder-experiments/{KMER}mers/fcgr/").rglob("*npy"))]


In [None]:
from collections import defaultdict

path_by_species = defaultdict(list)
for img in imgs:
    name_species=Path(img).parent.stem.split("__")[0]
    path_by_species[name_species].append(img)
len(path_by_species)

In [None]:
img_display = [paths[0] for name, paths in path_by_species.items()]
len(img_display)

In [None]:
display_images(img_display, k=KMER, 
            columns=5, width=17, height=8, max_images=10, 
            label_wrap_length=50, label_font_size=10,
            path_save=f"../plots/{KMER}mers/complexcgr-readme.png")

In [None]:
display_images(img_display, k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/outliers-bacteria_1-30.png")

In [None]:
display_images(img_display[30:], k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/bacteria_31-60.png")

In [None]:
display_images(img_display[60:], k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/bacteria_61-90.png")

In [None]:
display_images(img_display[90:], k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/bacteria_91-120.png")

In [None]:
display_images(img_display[120:], k=KMER, max_images=30, path_save=f"../plots/{KMER}mers/bacteria_121-end.png")