In [None]:
import zipfile
#with zipfile.ZipFile('/content/drive/MyDrive/hl2_blocks.zip', 'r') as zip_ref:      //Кусок кода для разархивирования содержимого с файлами для обучения(у меня они уже в папке сразу)
    #zip_ref.extractall('/content/hl2_blocks/')

In [None]:
import keras, os, random
import soundfile as sf
from keras import layers, regularizers, metrics, losses, activations

In [None]:
random.seed()

In [None]:
import tensorflow as tf  #мы задаём наши собственные функции потерь, которые определяют, за что мы штрафуем нейронку при обучении
import math

def freq_loss(y_true, y_pred):   #freq_loss штрафует за отклонения между АЧХ предсказания и известного результата, а есть phase_loss, которая штрафует сразу и за отклонения АЧХ, и за отклонения ФЧХ
    freq_t = tf.signal.rfft(y_true)
    freq_p = tf.signal.rfft(y_pred)
    sq_freq = tf.square((tf.math.abs(freq_t) - tf.math.abs(freq_p)) / tf.cast(len(freq_t), tf.float32))
    freq_loss = tf.reduce_mean(sq_freq, axis=-1)
    return freq_loss

def phase_loss(y_true, y_pred):
    rfft_t = tf.signal.rfft(y_true)
    rfft_p = tf.signal.rfft(y_pred)
    real_t = tf.math.real(rfft_t)
    imag_t = tf.math.imag(rfft_t)
    real_p = tf.math.real(rfft_p)
    imag_p = tf.math.imag(rfft_p)
    sq_real = tf.square(real_t - real_p)
    sq_imag = tf.square(imag_t - imag_p)
    phase_loss = tf.reduce_mean(sq_imag, axis=-1) + tf.reduce_mean(sq_real, axis=-1)
    return phase_loss

def comb_loss(y_true, y_pred):
    mse = losses.MeanSquaredError()
    mae = losses.MeanAbsoluteError()
    cs  = losses.CosineSimilarity(axis=1)
    return phase_loss(y_true, y_pred)

In [None]:
input_sndblock = keras.Input(shape=(256,))  #проектируем модель, компонуем её и выводим текстовое представление(слой всего один, и более того, с линейной активацией)
upsampled = layers.Dense(1024,
                    activation='linear')(input_sndblock)
upsampler = keras.Model(input_sndblock,
                        upsampled)

upsampler.compile(optimizer='nadam',
                  loss=comb_loss,
                  metrics=[
                            metrics.MeanSquaredError(),
                            metrics.MeanAbsoluteError(),
                            metrics.CosineSimilarity()
                  ])
upsampler.summary()
#Если попытаться изменить функцию активации или накинуть ещё слоёв, можно обнаружишь, что процесс обучения затягивается, а результаты либо остаются такими же, либо ухудшаются. Объясняется это тем, что для набора статистики без учёта частотных зависимостей в самой архитектуре НС достаточно обычного dense-слоя с линейной активацией, а зависимости в частотной области учитываются в функции потерь

In [None]:
import numpy as np    #Загрузка датасета в оперативку
x_train = []
y_train = []

blocks_dir = "/hl2_blocks"
block_len = 1024
for root, subdirs, files in os.walk(blocks_dir):
    for file in files:
        if not file.endswith(".wav"):
            break
        block_sdata, _ = sf.read(os.path.join(root, file))
        if len(block_sdata) != block_len:
            break
        x_train.append(block_sdata[::4]) #поступают блоки сигнала, прорежённые в 4 раза: только каждый четвёртый сэмпл попадает в блок. Это симулирует снижение частоты дискретизации в4раза.
        x_train.append([-x for x in block_sdata[::4]])
        x_train.append([x for x in reversed(block_sdata[::4])])
        x_train.append([-x for x in reversed(block_sdata[::4])])
        y_train.append(block_sdata) #тут уже в неизменном виде
        y_train.append([-x for x in block_sdata])
        y_train.append([x for x in reversed(block_sdata)])
        y_train.append([-x for x in reversed(block_sdata)])
        if random.randint(1, 1000) == 1:
            print(file)
x_train = np.array(x_train)
y_train = np.array(y_train)
print(x_train.shape)
#ещё выполняется аугментацию данных: мы можем увеличить датасет в 4 раза, если примем, что для нас блок сигнала, отражённого по оси времени, по оси значений или по обеим осям сразу, тоже приемлем в качестве выборки - грубо говоря, берём сначала звук в противофазе, потом реверсим по времени, а потом и зареверсенный берём в противофазе. У меня получилось около 83 тысяч блоков.

In [None]:
upsampler.fit(x_train, y_train,   #нейронка тренируется 100 эпох на кусках датасета размером по 16 представителей, 1/10 часть данных берём в качестве валидационных
              epochs=100,
              batch_size=16,
              shuffle=True,
              validation_split=0.100)

In [None]:
tb0, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00000.wav")
tb1, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00001.wav")
tb2, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00002.wav")
tb3, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00003.wav")
tb4, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00004.wav")
tb5, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00005.wav")
tb6, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00006.wav")
tb7, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00007.wav")
tb8, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00008.wav")
tb9, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00009.wav")
tba, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00010.wav")
tbb, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00011.wav")
tbc, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00012.wav")
tbd, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00013.wav")
tbe, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00014.wav")
tbf, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00015.wav")
tbg, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00016.wav")
tbh, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00017.wav")
tbi, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00018.wav")
tbj, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00019.wav")
tbk, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00020.wav")
tbl, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00021.wav")
tbm, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00022.wav")
tbn, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00023.wav")
tbo, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00024.wav")
tbp, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00025.wav")
tbq, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00026.wav")
tbr, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00027.wav")
tbs, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00028.wav")
tbt, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00029.wav")
tbu, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00030.wav")
tbv, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00031.wav")
tbw, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00032.wav")
tbx, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00033.wav")
tby, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00034.wav")
tbz, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00035.wav")
tbaa, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00036.wav")
tbab, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00037.wav")
tbac, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00038.wav")
tbad, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00039.wav")
tbae, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00040.wav")
tbaf, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00041.wav")
tbag, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00042.wav")
tbah, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00043.wav")
tbai, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00044.wav")
tbaj, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00045.wav")
tbak, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00046.wav")
tbal, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00047.wav")
tbam, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00048.wav")
tban, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00049.wav")
tbao, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00050.wav")
tbap, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00051.wav")
tbaq, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00052.wav")
tbar, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00053.wav")
tbas, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00054.wav")
tbat, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00055.wav")
tbau, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00056.wav")
tbav, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00057.wav")
tbaw, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00058.wav")
tbax, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00059.wav")
tbay, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00060.wav")
tbaz, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00061.wav")
tbba, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00062.wav")
tbbb, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00063.wav")
tbbc, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00064.wav")
tbbd, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00065.wav")
tbbe, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00066.wav")
tbbf, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00067.wav")
tbbg, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00068.wav")
tbbh, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00069.wav")
tbbi, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00070.wav")
tbbj, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00071.wav")
tbbk, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00072.wav")
tbbl, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00073.wav")
tbbm, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00074.wav")
tbbn, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00075.wav")
tbbo, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00076.wav")
tbbp, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00077.wav")
tbbq, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00078.wav")
tbbr, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00079.wav")
tbbs, _ = sf.read("hl2_blocks/ba_juicedup/ba_juicedup_block_00080.wav")

x_test = np.array([tb0[::4], tb1[::4], tb2[::4], tb3[::4], tb4[::4], tb5[::4], tb6[::4], tb7[::4], tb8[::4], tb9[::4], tba[::4], tbb[::4], tbc[::4], tbd[::4],
                   tbe[::4], tbf[::4], tbg[::4], tbh[::4], tbi[::4], tbj[::4], tbk[::4], tbl[::4], tbm[::4], tbn[::4], tbo[::4], tbp[::4], tbq[::4], tbr[::4],
                   tbs[::4], tbt[::4], tbu[::4], tbv[::4], tbw[::4], tbx[::4], tby[::4], tbz[::4], tbaa[::4], tbab[::4], tbac[::4], tbad[::4], tbae[::4],
                   tbaf[::4], tbag[::4], tbah[::4], tbai[::4], tbaj[::4], tbak[::4], tbal[::4], tbam[::4], tban[::4], tbao[::4], tbap[::4], tbaq[::4],
                   tbar[::4], tbas[::4], tbat[::4], tbau[::4], tbav[::4], tbaw[::4], tbax[::4], tbay[::4], tbaz[::4], tbba[::4], tbbb[::4], tbbc[::4], tbbd[::4], tbbe[::4],
                   tbbf[::4], tbbg[::4], tbbh[::4], tbbi[::4], tbbj[::4], tbbk[::4], tbbl[::4], tbbm[::4], tbbn[::4], tbbo[::4], tbbp[::4], tbbq[::4],
                   tbbr[::4], tbbs[::4]])
upped = upsampler.predict(x_test)
exp_b = np.concatenate(upped)
print(exp_b.shape)
sf.write('b.wav', exp_b, 44100)

In [None]:
import soundfile as sf  #таргет практис
blks = []
for block in sf.blocks('/content/targetpractice.wav', blocksize=256, overlap=0, fill_value=0):
    blks.append(block)
upped = upsampler.predict(np.array(blks))
upped_concat = np.concatenate(upped)
sf.write('hl1.wav', upped_concat, 44100)