In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import logging
logging.basicConfig(level=logging.INFO)

# Data preprocessing for the neural network

This notebook illustrate the data preprocessing steps to create the dataset

In [None]:
# Input resolution of the panel
res = 512

### 1. Projection of the mesh distance onto a plane

Alternatively, run the script `panel_obj_to_mat.py`, which can execute this code in parallel.

In [None]:
import os
from ddad.ml.preprocess import get_panel_names, build_surface_from_name
from ddad.ml.path import data_path
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Force the recomputation of the panel
rebuild = False

In [None]:
names = get_panel_names()
path_data = os.path.join(data_path("processed"), "../panel_mat_{}".format(res))
os.makedirs(path_data, exist_ok=True)
for name in names:
    print("Converting panel: {}".format(name))
    try:
        outpath = os.path.join(path_data, name+"_mat.npz")
        if rebuild or not(os.path.exists(outpath)):
            if not rebuild:
                print("   Not availlable -> build")
            surface = build_surface_from_name(name, res=res)
            if surface is not None:
                np.savez_compressed(outpath, surface=surface)
            else:
                print("Something went wrong...")
        else:
            print("   Already availlable -> skip")
    except Exception as _:
        import traceback

        traceback.print_exc()
        print("Fail for panel {}".format(name))

In [None]:
names = get_panel_names()
print("Found {} walls".format(len(names)))

In [None]:
path_data = os.path.join(data_path("processed"), "../panel_mat_{}".format(res))
all_mat = []
names.sort()
for name in names:
    panel_path = os.path.join(path_data, name+"_mat.npz")
    all_mat.append(np.load(panel_path)["surface"])
all_mat = np.array(all_mat)

In [None]:
plt.hist(all_mat.flatten(),100);
plt.yscale("log")

In [None]:
from ddad.ml.plot import draw_images
vmax = np.max(all_mat)
vmin = - vmax
plt.figure(figsize=(30,30))
im = draw_images(all_mat, 15,15, cmap=plt.cm.seismic, vmin=vmin, vmax=vmax )


In [None]:
np.min(all_mat), np.max(all_mat)

In [None]:
col = 13
line = 8
ind = (col-1)*15+line-1
vmax = np.max(all_mat)
vmin = - vmax
plt.figure(figsize=(8,8))
plt.imshow(all_mat[ind], cmap=plt.cm.seismic, vmin=vmin, vmax=vmax )
plt.title(names[ind])
plt.colorbar()

### 2. Creating a dataset from (geometry, impulse) pairs

In [None]:
import os
import numpy as np
from ddad.ml.preprocess import build_dataset


In [None]:
use_time = False
use_chanels = True
plane = 0 # Select the closest plane 6x6
data_type="cummulative_energy"

# Name for our dataset
name_file = "../data/data_{}_{}_{}_{}_{}.npz".format(res, data_type, use_time, use_chanels, plane)

#### Preprocess

In [None]:
mats, samples, sel_names, sel_sources = build_dataset(res, data_type, use_time, use_chanels, plane)
np.savez_compressed(name_file, mats=mats, samples=samples, sel_names=sel_names, sel_sources=sel_sources)

del mats
del samples
del sel_names
del sel_sources

In [None]:
# Test data loading
dat = np.load(name_file, allow_pickle=True)
samples = dat["samples"]
mats = dat["mats"]
sel_names = dat["sel_names"]
sel_sources = dat["sel_sources"]
sel_sources = [tuple(e) for e in sel_sources.tolist()]

del dat

#### Preprocess for a different configuration

In [None]:
use_time = False
use_chanels = False
plane = 0 # Select the closest plane 6x6
data_type="cummulative_energy"

# Name for our dataset
name_file = "../data/data_{}_{}_{}_{}_{}.npz".format(res, data_type, use_time, use_chanels, plane)

In [None]:
mats, samples, sel_names, sel_sources = build_dataset(res, data_type, use_time, use_chanels, plane)
np.savez_compressed(name_file, mats=mats, samples=samples, sel_names=sel_names, sel_sources=sel_sources)

del mats
del samples
del sel_names
del sel_sources

In [None]:
# Test data loading
dat = np.load(name_file, allow_pickle=True)
samples = dat["samples"]
sel_names = dat["sel_names"]
sel_sources = dat["sel_sources"]
sel_sources = [tuple(e) for e in sel_sources.tolist()]
mats = dat["mats"]

del dat

### 3. Look at a sample

In [None]:
import matplotlib.pyplot as plt

In [None]:
sample = samples[0]

In [None]:
# First part is the geometry input
plt.imshow(mats[sample[0]]);

In [None]:
# Outputs, in this case the reflected energy
if use_chanels:
    plt.figure(figsize=(10, 6))
    for i in range(5):
        plt.subplot(2,3,i+1)
        plt.imshow(sample[1][:,i].reshape(6-plane, 6-plane))
        plt.colorbar()
else:
    plt.imshow(sample[1].reshape(6-plane, 6-plane))
    plt.colorbar()

In [None]:
# The mask used to tell which of the outputs should be used for the loss
plt.imshow(sample[2].reshape(6-plane, 6-plane))

In [None]:
# The mask indicating the position of the source (just the reverse of the previous element)
plt.imshow(sample[3].reshape(6-plane, 6-plane))

In [None]:
sample[1].shape

## 4. Creating dataset for alternative problems

Object classification and interpolation from IR

In [None]:
from ddad.ml.preprocess import get_panel_names
from ddad.dataset import PanelData
import matplotlib.pyplot as plt
import numpy as np
from ddad.ml.path import 



In [None]:
from ddad.grid import Grid
from ddad.config import GRID_FILE
grid = Grid.from_json(GRID_FILE)

In [None]:
cells_source_reciever = [grid.cells_from_index(i) for i in range(2951)]
cells = np.array(cells_source_reciever).reshape(-1, 3).tolist()
cells = set([tuple(c) for c in cells])
positions = np.array([np.array(grid.position(c)) for c in cells])
mp = np.median(positions, axis=0)
sp = 125
positions = (positions - mp) / sp
cells2positions = {c:p for c,p in zip(cells, positions)}
positions_source_reciever = np.array([ np.array([cells2positions[c1], cells2positions[c2] ]) for c1, c2 in cells_source_reciever])
# positions_source_reciever

In [None]:
# for name in get_panel_names():

lt = 256

name_file = os.path.join(data_path(), "../data_classification_{}.npz".format(lt))

names = get_panel_names()

samples = []
labels = []
# srs = []

for i, name in enumerate(names):

    p = PanelData(name)
    samples.append(p.matrix_after_direct_sound_removal[:,:256])
    labels.append(i * np.ones(2951).astype(int))
    srs.append(positions_source_reciever)
labels = np.concatenate(labels)
samples = np.concatenate(samples)
srs = np.concatenate(srs)

np.savez(name_file, labels=labels, samples=samples, names=names)



In [None]:
samples.shape, labels.shape, len(names)

In [None]:
name_file = os.path.join(data_path(), "../data_interpolation_{}.npz".format(lt))

np.savez(name_file, srs=srs, cells2positions=cells2positions, 
         cells_source_reciever=cells_source_reciever, samples=samples, names=names,
         mp=mp, sp=sp, positions_source_reciever=positions_source_reciever)


## 5. Check reprinted panels

In [None]:
names = get_panel_names()
print("Found {} walls".format(len(names)))

In [None]:
for name in names:
    if "reprint" in name:
        try:
            parts = name.split("reprint")
            name1 = parts[0] + parts[1]        
            name2 = name
            out1 = np.array([s[1] for s  in samples[sel_names==name1]])
            print(name1, out1.shape)
            out2 = np.array([s[1] for s  in samples[sel_names==name2]])
            print(name2, out2.shape)
            print(np.linalg.norm((out1-out2).flatten())/np.linalg.norm(out1))
        except:
            pass

In [None]:
for name in names:
    if "reprint" in name:
        try:
            parts = name.split("reprint")
            name1 = parts[0] + parts[1]        
            name2 = name
            out1 = PanelData(name1).total_energy_bands_normalized.flatten()
            out2 = PanelData(name2).total_energy_bands_normalized.flatten()
            print("Rel error between {} and {} : {}".format(name1, name2, np.linalg.norm((out1-out2).flatten())/np.linalg.norm(out1)))
        except:
            pass

In [None]:
for name in names:
    if "reprint" in name:
        try:
            parts = name.split("reprint")
            name1 = parts[0] + parts[1]        
            name2 = name
            out1 = PanelData(name1).flat.total_energy_bands_normalized.flatten()
            out2 = PanelData(name2).flat.total_energy_bands_normalized.flatten()
            print("Rel error between flat for {} and flat for {} : {}".format(name1, name2, np.linalg.norm((out1-out2).flatten())/np.linalg.norm(out1)))
        except:
            pass

In [None]:
from ddad.dataset import PanelData

In [None]:
name1 = "panel_0003_1"
name2 = "panel_0003reprint_1"
# name1 = "panel_0002_1"
# name2 = "panel_0002reprint_1"
p1 = PanelData(name1)
p2 = PanelData(name2)

In [None]:
p1.print_panel_info() 
p2.print_panel_info() 

In [None]:
index = 200

In [None]:
plt.plot(p1.matrix_after_direct_sound_removal[index], label =name1)
plt.plot(p2.matrix_after_direct_sound_removal[index], label =name2)
plt.legend()
# plt.xlim([20, 150])

In [None]:
plt.plot(np.mean(p1.matrix_after_direct_sound_removal, axis=0), label =name1)
plt.plot(np.mean(p2.matrix_after_direct_sound_removal, axis=0), label =name2)
plt.legend()
