System information
- I have used an example from tf doc
- Ubuntu 18.04.4
- TensorFlow 2.1.0 installed via pip
- Python version: 3.7.3
Following the official example from inverse_stft code to reconstruct signal I got either totally different signal or when frame_step>=frame_length I got nan's every frame_step elements starting at position 0. For clarity, the lengths from the official example were divided by 10.
import tensorflow as tf
frame_length = 40
frame_step = 16
waveform = tf.random.normal(dtype=tf.float32, shape=[100])
stft = tf.signal.stft(waveform, frame_length, frame_step)
inverse_stft = tf.signal.inverse_stft(
stft, frame_length, frame_step,
window_fn=tf.signal.inverse_stft_window_fn(frame_step))
print(inverse_stft)
print(waveform)
tf.Tensor(
[ 0.00000000e+00 3.99959536e-05 -2.40500085e-04 -1.59982394e-03
5.19763958e-03 -4.16988507e-03 2.64100209e-02 -9.08877850e-02
3.40112969e-02 3.86196673e-01 5.13033606e-02 -2.86520272e-01
6.11472845e-01 4.98466581e-01 1.13349378e+00 1.09166965e-01
-9.97731745e-01 -5.43629766e-01 -2.73563933e+00 7.85581648e-01
-8.19584429e-01 1.79670918e+00 9.02783334e-01 -1.48530865e+00
-2.47491312e+00 -1.24023890e+00 1.15495408e+00 1.42492950e+00
-7.37325311e-01 -1.20595813e+00 -3.62768292e-01 1.35240197e+00
-3.68225783e-01 -7.91088283e-01 1.89298892e+00 5.07700205e-01
6.15092158e-01 -1.92212605e+00 -8.31669629e-01 7.79488802e-01
-3.24547052e-01 -5.50925493e-01 -7.81439543e-01 8.75341117e-01
-6.82320118e-01 7.01450050e-01 -4.50847566e-01 6.66174769e-01
-1.23529518e+00 -1.24680352e+00 3.43645632e-01 1.09950686e+00
5.97586334e-01 -3.48894447e-01 6.68327957e-02 -1.43280745e+00
-6.64948940e-01 -7.94504046e-01 1.14265656e+00 -9.80610073e-01
-4.60786790e-01 -3.97007465e-01 -9.27713037e-01 1.07594097e+00
5.27661502e-01 -1.61613131e+00 -2.59635901e+00 -3.46627653e-01
3.27228665e-01 -1.07391989e+00 9.76892650e-01 -2.26863116e-01
-1.79977849e-01 -7.51911581e-01 -1.52214825e-01 -2.73942560e-01
-5.64359844e-01 5.01259208e-01 4.91654649e-02 -1.36436284e-01
1.09956011e-01 7.98072666e-02 -1.27519906e-01 1.10613126e-02
-3.04599851e-03 1.31062159e-04 -4.15140385e-05 3.23910135e-05], shape=(88,), dtype=float32)
tf.Tensor(
[ 1.4038084 1.0222757 -0.39947665 -0.54522014 0.5803976 -0.19679223
0.61842763 -1.1810968 0.26712915 1.9677812 0.18064204 -0.73945624
1.2229462 0.81378937 1.5831046 0.13582362 -1.1432989 -0.58895135
-2.8576777 0.8025885 -0.8269907 1.8019977 0.9033268 -1.4853681
-2.4749134 -1.2402394 1.1549553 1.4249299 -0.73732626 -1.205959
-0.36276835 1.3524033 -0.36822623 -0.7910883 1.8929895 0.50770015
0.615092 -1.922127 -0.8316696 0.77948934 -0.32454705 -0.5509257
-0.7814399 0.87534165 -0.6823204 0.7014507 -0.45084816 0.6661754
-1.2352954 -1.2468035 0.34364572 1.0995075 0.59758615 -0.34889427
0.06683233 -1.4328083 -0.6649485 -0.7945043 1.1426575 -0.98061115
-0.46078658 -0.39700815 -0.9277131 1.0759422 0.527661 -1.616195
-2.5979242 -0.34764734 0.33018574 -1.0971678 1.0204725 -0.24577658
-0.2062361 -0.9355164 -0.21259227 -0.44723493 -1.1287199 1.293658
0.1731153 -0.6951827 0.8636062 1.0371045 -2.9860606 0.5220258
-0.34013367 0.04466997 -0.06895867 0.82789123 0.43297967 0.7684806
-0.6908023 0.57721597 0.9199321 1.3768114 1.2351048 -0.9184835
0.5188774 0.88726586 1.4416382 -0.40344936], shape=(100,), dtype=float32)
import tensorflow as tf
frame_length = 40
frame_step = 40
waveform = tf.random.normal(dtype=tf.float32, shape=[100])
stft = tf.signal.stft(waveform, frame_length, frame_step)
inverse_stft = tf.signal.inverse_stft(
stft, frame_length, frame_step,
window_fn=tf.signal.inverse_stft_window_fn(frame_step))
print(inverse_stft)
print(waveform)
tf.Tensor(
[ nan -0.5289995 -0.42322898 1.7522525 0.34771994 -0.7660091
1.6534201 -0.23190129 -0.64468837 -1.1712008 -2.6483274 1.3840579
-1.0050658 0.87601507 -0.5170545 0.6623281 -1.0479059 -0.5475797
-1.2517245 -1.1959579 0.58061975 -0.1989309 0.18141915 -1.7858055
-0.7680144 -0.21199739 0.2686664 0.06079084 -0.68268234 0.9945366
2.2236376 -0.56432855 1.281768 0.8328386 -0.6435259 0.85617316
-0.09510866 0.7118371 2.0645704 0.63955176 nan 0.15469487
1.0494236 0.3661145 -1.4089363 -1.3391382 1.0011977 -0.07683202
-1.4500219 -0.0392501 1.1920362 1.8719866 0.7821921 -0.45498323
-1.890665 0.67334163 -2.023159 -0.29750296 -0.2908026 0.38893116
-0.10589531 0.7326803 1.2091097 -0.34784627 1.311726 0.79103297
-0.91987884 0.51282054 -0.20457014 0.50253487 -0.46842676 2.5228522
-0.07464705 1.7176666 -0.40536413 -0.06539667 1.0447518 -0.42340115
0.1290549 0.53456116], shape=(80,), dtype=float32)
tf.Tensor(
[-2.005882 -0.5290042 -0.42322478 1.7522544 0.34772 -0.76600945
1.6534219 -0.23190151 -0.64468867 -1.1712015 -2.6483283 1.3840592
-1.0050665 0.8760156 -0.5170555 0.66232944 -1.0479063 -0.54757965
-1.2517256 -1.1959581 0.5806202 -0.19893074 0.1814189 -1.7858069
-0.76801383 -0.21199743 0.2686672 0.06079032 -0.6826823 0.9945371
2.2236395 -0.56433004 1.2817682 0.8328381 -0.64352524 0.8561739
-0.09510913 0.7118365 2.064576 0.6395979 0.7826336 0.15469465
1.0494274 0.36611095 -1.4089363 -1.339139 1.0011992 -0.07683332
-1.4500217 -0.03925065 1.1920377 1.8719873 0.78219193 -0.4549838
-1.8906664 0.67334247 -2.0231593 -0.2975028 -0.2908029 0.3889313
-0.1058953 0.7326806 1.2091101 -0.34784687 1.3117266 0.79103285
-0.91987914 0.5128205 -0.20457047 0.5025352 -0.46842635 2.5228539
-0.07464796 1.7176673 -0.40536594 -0.06539585 1.0447516 -0.42340097
0.12904675 0.5345574 -0.12362233 -1.9486434 0.53776556 -0.32944998
0.48096088 -1.0506314 -1.2964804 -0.34489468 -0.38139907 0.49959585
-0.25842932 -0.8132113 1.0629432 -0.5591399 0.11014029 -0.4576707
-0.15766484 -0.07155921 0.2819425 0.40777263], shape=(100,), dtype=float32)
System information
Following the official example from inverse_stft code to reconstruct signal I got either totally different signal or when frame_step>=frame_length I got nan's every frame_step elements starting at position 0. For clarity, the lengths from the official example were divided by 10.