From a70a6007f0e5ceb3371d86565bc4492a26c589d6 Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 12 Dec 2016 14:01:15 -0500 Subject: [PATCH 1/7] WIP: adding 24-bit support to io.wavfile Based on the code at: https://gist.github.com/josephernest/3f22c5ed5dabf1815f16efa8fa53d476 --- scipy/io/wavfile.py | 154 ++++++++++++++++++++++++++++++-------------- 1 file changed, 104 insertions(+), 50 deletions(-) diff --git a/scipy/io/wavfile.py b/scipy/io/wavfile.py index 4ae943602260..a7972b916517 100644 --- a/scipy/io/wavfile.py +++ b/scipy/io/wavfile.py @@ -61,13 +61,12 @@ def _read_fmt_chunk(fid, is_big_endian): fmt = '<' size = res = struct.unpack(fmt+'I', fid.read(4))[0] - bytes_read = 0 if size < 16: raise ValueError("Binary structure of wave file is not compliant") res = struct.unpack(fmt+'HHIIHH', fid.read(16)) - bytes_read += 16 + bytes_read = 16 format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res @@ -94,7 +93,7 @@ def _read_fmt_chunk(fid, is_big_endian): raise ValueError("Unknown wave file format") # move file pointer to next chunk - if size > (bytes_read): + if size > bytes_read: fid.read(size - bytes_read) return (size, format_tag, channels, fs, bytes_per_second, block_align, @@ -105,26 +104,23 @@ def _read_fmt_chunk(fid, is_big_endian): def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian, mmap=False): if is_big_endian: - fmt = '>I' + fmt = '>' else: - fmt = '> 7) * 255 + data = a.view(' 1: data = data.reshape(-1, channels) return data @@ -151,6 +157,10 @@ def _skip_unknown_chunk(fid, is_big_endian): # in case data equals somehow to 0, there is no need for seek() anyway if data: size = struct.unpack(fmt, data)[0] + # if odd number of bytes, move 1 byte further + # (data chunk is word-aligned) + if size % 2 == 1: + size += 1 fid.seek(size, 1) @@ -164,8 +174,7 @@ def _read_riff_chunk(fid): fmt = '>I' else: # There are also .wav files with "FFIR" or "XFIR" signatures? - raise ValueError("File format {}... not " - "understood.".format(repr(str1))) + raise ValueError("File format %r not understood." % str1) # Size of entire file file_size = struct.unpack(fmt, fid.read(4))[0] + 8 @@ -177,11 +186,11 @@ def _read_riff_chunk(fid): return file_size, is_big_endian -def read(filename, mmap=False): +def read(filename, mmap=False, return_cues=False, return_pitch=False): """ Open a WAV file - Return the sample rate (in samples/sec) and data from a WAV file. + Return the sample rate (in samples/sec) and data from a WAV file Parameters ---------- @@ -203,7 +212,7 @@ def read(filename, mmap=False): Notes ----- - This function cannot read wav files with 24-bit data. + The returned sample rate is a Python integer. Common data types: [1]_ @@ -224,7 +233,6 @@ def read(filename, mmap=False): Interface and Data Specifications 1.0", section "Data Format of the Samples", August 1991 http://www-mmsp.ece.mcgill.ca/documents/audioformats/wave/Docs/riffmci.pdf - """ if hasattr(filename, 'read'): fid = filename @@ -238,6 +246,9 @@ def read(filename, mmap=False): channels = 1 bit_depth = 8 format_tag = WAVE_FORMAT_PCM + cue = [] + cuelabels = [] + pitch = 0.0 while fid.tell() < file_size: # read the next chunk chunk_id = fid.read(4) @@ -252,21 +263,33 @@ def read(filename, mmap=False): fmt_chunk = _read_fmt_chunk(fid, is_big_endian) format_tag, channels, fs = fmt_chunk[1:4] bit_depth = fmt_chunk[6] - if bit_depth not in (8, 16, 32, 64, 96, 128): - raise ValueError("Unsupported bit depth: the wav file " - "has {}-bit data.".format(bit_depth)) - elif chunk_id == b'fact': - _skip_unknown_chunk(fid, is_big_endian) elif chunk_id == b'data': if not fmt_chunk_received: raise ValueError("No fmt chunk before data") data = _read_data_chunk(fid, format_tag, channels, bit_depth, - is_big_endian, mmap) - elif chunk_id == b'LIST': - # Someday this could be handled properly but for now skip it + is_big_endian, mmap=mmap) + elif chunk_id == b'cue ': + str1 = fid.read(8) + _, numcue = struct.unpack(' 0xFFFFFFFF: + if len(header_data) + data.nbytes > 0xFFFFFFFF: raise ValueError("Data exceeds wave file size limit") fid.write(header_data) # data chunk + if bitrate == 24: + a32 = numpy.asarray(data, dtype=numpy.int32) + if a32.ndim == 1: + # Convert to a 2D array with a single column. + a32.shape = a32.shape + (1,) + # By shifting first 0 bits, then 8, then 16, + # the resulting output is 24 bit little-endian. + a8 = (a32.reshape(a32.shape + (1,)) >> numpy.array([0,8,16])) & 255 + data = a8.astype(numpy.uint8) + fid.write(b'data') fid.write(struct.pack('' or (data.dtype.byteorder == '=' and @@ -383,12 +419,30 @@ def write(filename, rate, data): data = data.byteswap() _array_tofile(fid, data) + # cue chunk + if cue: + fid.write(b'cue ') + size = 4 + len(cue) * 24 + fid.write(struct.pack(' Date: Mon, 12 Dec 2016 15:36:41 -0500 Subject: [PATCH 2/7] PEP8 fixes, merging cues w/ cuelabels --- scipy/io/wavfile.py | 86 ++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/scipy/io/wavfile.py b/scipy/io/wavfile.py index a7972b916517..ea336f93c1b2 100644 --- a/scipy/io/wavfile.py +++ b/scipy/io/wavfile.py @@ -14,6 +14,7 @@ import numpy import struct import warnings +from collections import defaultdict __all__ = [ @@ -40,8 +41,6 @@ def _read_fmt_chunk(fid, is_big_endian): """ Returns ------- - size : int - size of format subchunk in bytes (minus 8 for "fmt " and itself) format_tag : int PCM, float, or compressed format channels : int @@ -60,23 +59,22 @@ def _read_fmt_chunk(fid, is_big_endian): else: fmt = '<' - size = res = struct.unpack(fmt+'I', fid.read(4))[0] + size = struct.unpack(fmt+'I', fid.read(4))[0] if size < 16: raise ValueError("Binary structure of wave file is not compliant") - res = struct.unpack(fmt+'HHIIHH', fid.read(16)) + (format_tag, channels, fs, bytes_per_second, block_align, bit_depth + ) = struct.unpack(fmt+'HHIIHH', fid.read(16)) bytes_read = 16 - format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res - - if format_tag == WAVE_FORMAT_EXTENSIBLE and size >= (16+2): + if format_tag == WAVE_FORMAT_EXTENSIBLE and size >= 18: ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0] bytes_read += 2 if ext_chunk_size >= 22: extensible_chunk_data = fid.read(22) bytes_read += 22 - raw_guid = extensible_chunk_data[2+4:2+4+16] + raw_guid = extensible_chunk_data[6:22] # GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361) # MS GUID byte order: first three groups are native byte order, # rest is Big Endian @@ -96,8 +94,7 @@ def _read_fmt_chunk(fid, is_big_endian): if size > bytes_read: fid.read(size - bytes_read) - return (size, format_tag, channels, fs, bytes_per_second, block_align, - bit_depth) + return format_tag, channels, fs, bytes_per_second, block_align, bit_depth # assumes file pointer is immediately after the 'data' id @@ -131,7 +128,7 @@ def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian, # if odd number of bytes, move 1 byte further (data chunk is word-aligned) if size % 2 == 1: - fid.seek(1, 1) + fid.seek(1, 1) if bit_depth == 24: a = numpy.empty((len(data)/3, 4), dtype='u1') @@ -160,7 +157,7 @@ def _skip_unknown_chunk(fid, is_big_endian): # if odd number of bytes, move 1 byte further # (data chunk is word-aligned) if size % 2 == 1: - size += 1 + size += 1 fid.seek(size, 1) @@ -246,8 +243,7 @@ def read(filename, mmap=False, return_cues=False, return_pitch=False): channels = 1 bit_depth = 8 format_tag = WAVE_FORMAT_PCM - cue = [] - cuelabels = [] + cues = defaultdict(dict) pitch = 0.0 while fid.tell() < file_size: # read the next chunk @@ -261,29 +257,27 @@ def read(filename, mmap=False, return_cues=False, return_pitch=False): if chunk_id == b'fmt ': fmt_chunk_received = True fmt_chunk = _read_fmt_chunk(fid, is_big_endian) - format_tag, channels, fs = fmt_chunk[1:4] - bit_depth = fmt_chunk[6] + format_tag, channels, fs, _, __, bit_depth = fmt_chunk elif chunk_id == b'data': if not fmt_chunk_received: raise ValueError("No fmt chunk before data") data = _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian, mmap=mmap) elif chunk_id == b'cue ': - str1 = fid.read(8) - _, numcue = struct.unpack(' Date: Mon, 12 Dec 2016 15:38:58 -0500 Subject: [PATCH 3/7] Add link to reference page [ci skip] --- scipy/io/wavfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scipy/io/wavfile.py b/scipy/io/wavfile.py index ea336f93c1b2..6d6df4a4083e 100644 --- a/scipy/io/wavfile.py +++ b/scipy/io/wavfile.py @@ -28,15 +28,17 @@ class WavFileWarning(UserWarning): pass +# see the Wave File Format reference for details of chunk layouts: +# https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html + WAVE_FORMAT_PCM = 0x0001 WAVE_FORMAT_IEEE_FLOAT = 0x0003 WAVE_FORMAT_EXTENSIBLE = 0xfffe KNOWN_WAVE_FORMATS = (WAVE_FORMAT_PCM, WAVE_FORMAT_IEEE_FLOAT) + # assumes file pointer is immediately # after the 'fmt ' id - - def _read_fmt_chunk(fid, is_big_endian): """ Returns From 350838be73386975a6930704ca53a7286f54c97c Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Mon, 12 Dec 2016 15:47:09 -0500 Subject: [PATCH 4/7] Fixing signedness of pitch fraction [ci skip] --- scipy/io/wavfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/io/wavfile.py b/scipy/io/wavfile.py index 6d6df4a4083e..5c7ac108d991 100644 --- a/scipy/io/wavfile.py +++ b/scipy/io/wavfile.py @@ -279,7 +279,7 @@ def read(filename, mmap=False, return_cues=False, return_pitch=False): elif chunk_id == b'smpl': size = struct.unpack(' Date: Mon, 12 Dec 2016 15:59:19 -0500 Subject: [PATCH 5/7] Adding 24bit test fixture, fixing bugs --- scipy/io/tests/data/test-44100Hz-24bit.wav | Bin 0 -> 66448 bytes scipy/io/tests/test_wavfile.py | 13 +++++++++++++ scipy/io/wavfile.py | 4 ++-- 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 scipy/io/tests/data/test-44100Hz-24bit.wav diff --git a/scipy/io/tests/data/test-44100Hz-24bit.wav b/scipy/io/tests/data/test-44100Hz-24bit.wav new file mode 100644 index 0000000000000000000000000000000000000000..305540fef8a4e99a5a8d9ec4a0a6ec55d67df111 GIT binary patch literal 66448 zcmeI*=TlS%!1sNsA}C4+0Rbt}dxzacRP4sCvG-mOTZ}O=_7Z!E-PlX4u~&AN-aAND z1VlhU5v&N$_gv3B^IZ3TaKAWbKFgc}JG^2BhVT4F4vdJ{rmm_ouJ_n}a~7|#?xv!m zqN<{%()XH*%8^bQDjifzRc1|JF@27jDo=!?R6_%|HMJ1crUSdWuEFUxVOtC(tU zUGayr6aG_i!1Y@_5i7C6(tphn@xuf^{0%X6Qa21))*1KLXyMoz4YaLL!Mmlch+Ogo zdsj4L$d>mwdg&b|l)S|Z$12R*Q;yC?Wk`Kmgbq6j(DX+xPMyiZyASEuw>%ZPPRY1s zn23FW@z`}Z7K1!v;MGxyg%%1_ManTmQ=n56jh`9?YW?I$D-v|SrGS2{5@(EJ5p^yO zEejJcb#W4|Ur50j{dA<>&cx33InennA4AR*!nVEy;gep$yRHJ$4_Cu-LLE+eH$vUw zBc|zp#zVdD=wRFqsku7FS$4z#Qyu7Ob;Xmn`j~#-7#@qeV?n1LShCdyk1Oo(uDc@= zom>#o#T}ECo>&#(gVw`-IB`1wK?j4-5E=sAtD$fylE5HUik#nNxc@;g$Sw>MjRjv4 zWLO#`7`{-71ydwQF$jhHuV7>+2O>VhAIDDmVxzw|>P~oIS&l2pOPmn!)B#6U*kX7m zYhY&JB4r&JFc$9BQO>77tDLlezYRB`s_ANV%*D>NszAa4E#cy6tS$AcRD z{i6y_Q(j|hNg0BFE5^mJ7wGJkhfRI6@qK#+JXF$f{Yo-2w3B?jAB$PP#h`yb zCBlX%FmS&dmZk##vS?&h3MyRW=ygY6{Hp@he<-mpCI(r3;$YVvkCcyz$goO*>Yr(7 zHOz!#VK$y8=ON+E3)niBVEew8*lkvUbI;$vYF#ZPBN|{A(1f?npRm{Q8%$l>u+mEn zr+hnLhnF_|?R0TjOCO($j4=3bGgJgw!YJAr<8AHGXJ#+#UF!_H`EKa!7xiH(--FbZ*jo0}s_KG|XFaT^%*=z;Ycx+6T*7{=}T$mr1(qdau*w{J(B^j610 zw{~=K`i@;rpYh)NBYXxnqF-bkI$W!URHYK9BVXZ}N-4VDEkxz6e9YgMgOiF(jI~LJ z|D6=vT9X8o)d_IB9tX7^vFMhf#Ep9jG?mG5r@vt5`)Fhq3XZDF@ocdm(?kI$RV9iY zWAJQOEEc%OBeYW@##rL^uxW-C#!cvq=l!}N!P*e}s!cHBusKFLS)uhr zPvqCzV~42|YRz2H*x-)ngI>6!<%v8DoD`^ zL$QNkj)q|O85th5NwLX7Q1m?%#V14XMm-ouJOYu}(;u5(`oLtK7rs1qN8TG3?0oHr zp7-pLHoYe*>UyAjtU0D$G=X860n${vVTn;^R9k2vz(NCaOjNK=zZF~czF?AJxh)|ZP6HBC|K4k`2Ld|2kQhU;}r;brNpR!Sp1b7ho6olAnjBV zT8mR~VrV)7K4iinHV6A+^0De&AuffKBIEiitd>W zZJzl3cOUpC`yuyb0D3a9yW!b_ncsKJspVJQ~t=G>x(X}UTB}}fuVa{k#xujQOg_z4z@6PXoY+y3-tcO z6rG+L;&Vkey#3Z0DH@${LQ@k{RaIfr+KRfbUl7~Yj9a=NaMrCJ8>ZI4{&E%i>Ac3) zzsj)KqZkuD7hp$qF7(y2Q8Xk2rxH?8wK5qR6B1#uDjxc&vDiO323=j1cpRv}woP() zb`#i_MB~I;!FE46PALUL_b8BbObLz37~Gv7hvCi%FtSTR$1y4R@Hh<%Mr9(;JO^|2 z@?q~)h`QA!Sp51WLKjyc!LS-L^J+2R-v%7t--O7mpOCWo8!m5e!;jr+knih&>RsAc z_J=O!&d|q8Z)2#}o55(8C6;ux!Inrne7)8Sn-$LJ_OBbR&+){G58n7`pdS`23_#+T zAgt>Ug50&C7<*TOq#IIrFOVUpO5kY|23-q5RK5&nh6`+WO7VP)1grao;$U2`Kqm

cpX6bUXsCB_cgD8LyqwVDc~nMJuzhV0s=3R=+^U`^C6#`V#9- zyhgU<4Z41jp-l*ER|ety?f~?Q^g}NTADlht ziQXk{kheHPr?MCPui9b5U>l4`v4oRbcU)g#j1w31ksPCo9y!|BQ_ulcd1}bdXv6cA zZ>UK9g#HChc-zo`J0^A5Jf<2B*DBE0>J_5?D}l}YLi`hv52Il@_Z<>@1wWlZzwr0(AIPgnPrw&@ZhVKJ%(jZdrrZQNJz_dIN)TxG)4YZW7$r(c}Ju^S4aAzb{OCLj+=Ks6f*1*%(>*!*)WY?|T(aS6DQnS{sssZiOT z4o$Bt^w7#hq)q{v0*bI=M=4IVzrv>jm8j_R7B_U>VP|zCj^unqWy)tPP5lnvtado& zt0SzSBUWYVAo*EW3_NatZnI4=+{9dPqX$+x^hEe?_Lz9h5j*d=AZfQdd_ufncHaj- zTK#aSOCZv!f{?N?1n;Usk)bWckPm`ikH~PRU9j6B3{{;4ng7Y~uc@G$Op0O$3GA~% z5FrajPE;U{F80S+FJHJRyl}mT2a<=lV*D^COmc95>WiK@KF13GirBkh_J@Ey{&o<-no%aaMeTNQOHK-j~g|iRKkrrBphSx=y zaiah;&*nl^nT4L+GVtr3RO}8<#!Z(*d>RsuzL#SWE{nkg10^2WEAZ25Ia0I)CyJu+ z;ho@tOpX)T0__V5c-&Xwvqmfo_Qs)QOagL8C*kIf6bxxegJgdu%BSRD{`h?4tt~`b zTnP^Pyn=(G0@e$wk?U56)tZg)XlQ~?d_zce8~%8whMa~DaIe+I?jl{ZJ<-SC zJB+a@ygRgBSz`TE8_a!Xhl!thp+`p-9DnDA>T{kL;Oqn6b$%FqECAd72tu`U2;z=~ z;%1rz%al?W{vyNcdciE)FoCI{K2?Sxy#-xXNzuAc0z11nb(^wu9EANu0Z zC~t&3^}x$!R}4{c#;@fL*nQj~un2~aChU@6^j;~ZkCx&2 z3qhqt7S%pRd{3AI8U59=7w&! zoN@42FN9CF!<6sV81{=Ljun`p#=;oi2kYa)JY8h3(#D!!JK*?gHS~#U!`da^FeLI5 zo~&(x@?ZmoM%N;!{S5|M;jKi->=>Bk#p0Ls z@whfS5vd!J@h~qHW|J~7+aMdSzvSXVrx!RHUX1nsl;MljYy9@C3iDUiK&@{*qU=8) z+o%O@dSCIU!4Jfls-n?C6D{VQkfqlJ+rM>#YqlX?9W+Ie)B=lAt)Lcc3ztm}`1_I* ztj@S1X0ZqEYI|eEN?*AC>kpl)fv8^^j85M}kTp<(PZOk=GxuB~-7-rcE{65QY zdz+wtkrWr-N$^}Ail^g3pnWF@BXR=}^~?`jB7IQS=!tfTJ5Ek^!Ko3B(6_XQPmB$w z^zQ-V>)r9~voT^_3=lD-D`rpDf$_|a_<6cI0w%TN;kfUp8TT3K(>`L(sz#hXQHP%k zsxi&E60?uKLVK@LoG&cI<=gr2zmtPe<(bHlrejKa3g+%lg4?bHJeJ3y*gY2Zg-X1a zD=?;74$q+i^N-PpDijP=kz>nT!ChSiyjlc?-DBYQTP%FK$75kr0@T%$Q9UpfX|d@D zSeAvALvnFpQ~{oEEW*yhQrsF*E_hps_s8Di!K`=i4}Xsx9?g(De!&Z;R;0SCV4sf$ zEPS;v&$TmFns&p;W&>D0HG%dbbIee&!e1+ULO0PKdz&5c`m+nZr@KRCwHG4leK69) zA1{Lg1zm!%_F@Rk)FqhMQwl3}88)65RB48x(m^n}qagB<41aVKI5|o&Tw4Nzn<2^WV z)_I`rwkxJRa>C2Q4%j`^7EfPUp=gu^)?PKm(6@##)ziZx$1bq+>4b1!O*nh1qTKlh zmN|aKYu6U|hkn4U3H2DYvjzp(Rk-Q<8o%EyL-Lqnw3xp@n06kbZL=|DVFn5-Qt@O@ zGA1ocMCp!r^eT%*-J%#A4^(1NxB}yM%3)+Euqchjg*X4hQ+~lyp8Z&GW|IN~b}8{B zCkA)M#^GPX1eEI}VRCQ^>Q1J?!8a4jzh$FiLmuAg6ynG55;Q$}iOGE`aHHl8Lax+8 z`g;RPW;Ma-=TC4Q_YE&6v>|Ak8oXw9z|*PPC>WuOzx?#^p@T8pm1fvD(-MQKt?_lB z9h|oH!r@cSaM?F>4?iT03c!=FAec4>!(~w@ez_z;?0G44XUcH2Tu^Bh zhFg{b)nXaU#tVKsBt^pk36_lxMgJGUXs`}~wPygbI{Lxwsy7apdE(f3H)t+!M&j^Z zQ0-=i=Bw7|Y;TFYKh5wd)(GBBdYI8g7m?kyG1{^N^t-F!u3;Pc=zT-7!6yv1Zh}0x z0Y-Cc(euh1T+^t)#C0$6%(w)S+!yHiC=Y)nW+PHF6NjSGuwOp~$<>Ki)EqAej>GmR zF$iCyMCl3zrrwlex09f$A{v?elS5gwTX#9q4+#QBC~$SK60VzKu<>gw0Y`rbhyEMbq}AZ)$$IQr{{d!EEeKok z73#}=VEigo`2DJhy}xwA?8RO1aI_vyIvXLV&J?lxEpWin8b;e}QJn68qYX}QsdUBR zYaZy@&l_K_`l7DbAGh)XG30PCTFpZdFk6BJbMSVlkah8XEUyC{(?bUTQT!56@1vMf$F_l z2;0^fZBgCeIn)qe^i8o{ZVs=JR`@x#Cw3S(U}>lmib7nGr0aoM|9L^f&KEw@{c(G4 zAXfMV<9cEU;;bZ?86?FbLm4!03HIoO;g*A7sH$N5aT(HDqGm8~+MHX=4yR zng-%zCx0}=_`q|J7s^h#<7%u6BA+`VbdNp4gL|S_*#nzg&GF(_6YRNdfM0UEVr8`s z)--g)#(H%ed)tnritjM1_>BFvAK}y1h<7&caOUT?u)0zSOT%*boGL}xpdzfaEP#ty zF0^`Qq2}*&thG)>U}+NOWhS6|YaHxn#KJ)%2ESD(Fi=$xHARkN-=kq)DDe9%_;-vP z&dmb7Yz6*oP-4=6SoAN8!<8EexcMLnLqDgW_ri4O>1DzAO%7I7=fhdQ2pTg>aU=f~ zZqBd7Qq#8xdRd3?$Bnph;Uj)K`5Dhoe#gAC?TEajj?~K?vEr-_=I!l@drJ(kI>-d2 z@4CZda}Q*+*+4SZ9#-2NA=&4G9ZTF{XX%BXkNMzog&$%+1>kU25c))gz`h_91#J>| zzLlcSei`Pr3OpUcu(GRQ=tCKFZ3H6+N%7uSg0}JydMP;W>y*OIu#=)y8v~uxmfo)3(MRy@cX~1aG8{h zl<-8XnH~@G$FZ0&EC%PTlyLG;pm?1ei#rR37f0iMtzb{E9DgMX%#SIs=&}+^nq!dr zTO16+5-=w;3294Hp!XsT2~nB&Ns@z7-+V-kEkxa^5`58q1*g9&P%o>-A=Nree9?dz z@+LgF_X&&ce#5edZHRuRhQaa<=<-w>8rO9ZxJMs~DaJ@K>5jwqEwMhp2C9GC;a*lR zOs;gs>l8PP+w2JyT^|ge>4%050eG+=2ujlsOx_s^1BC=P9!lZ6OoqA|!Ni_n7}Q;$ z$dW;8py0}SDHgAhV2W=jl>Y_8@>3w(RRYkE?TcNJ-iRvpfS<7&{;_q&l@7fSrL;xg z3D($^Wr0E$GvqBYLf<2LQ2nn9zC7y$Te&7SKUIa(gC8)x`xTQOwBT#(2UNYPhj!Om zgpYcI!M9%{-Ss8LBo$-8ju)tn%ES0A+1QYf0oR~3RF@{h;9(*PlH%cD6o>oAW1v1& zi6PSz*!zzh`>X|-ucEQDToBt+j)3!mS+f<`I$Md;=VGv^M;w;r#-k-B5gqH2@mv2i zH0NiaYU1-(ZBvz0DBy#saICSfe!07C~mc5bo!U7Y=UFsPjPk@80lw zCPeo_{6;&hv9&?L8B5Gp>yE4e#>o6d zA0dZz@%_9uI$Y_1DHqky`Ai$kPkzIpQ=hQzN)s{_4Om-Oi=UmV;kBUxw?4hZyqzUT z>Qe|S+kEVC&4JO(Osq>v!<-o@2=+{ZyI%srmdC;GT?`JNQR3D?1@^_up%o~&P!o;& z{KzVf9;7RW+n<7Z9|iL4m1r6h1O1p-BrJ@_tYL}Jn4gRtk5X|?l7YCUEClA~!mz3U z5f;VR|4SJTe=0}T{wkavSc85h^=SP39uKRVapmtwsd{8Z_`vy%v5{bjHlI zZm76mh|}{;k#1;#=jW_Ys&9+CQykE;(FrU6aE0$M4@|B1!l((pIJ4g$ItK%>dr~l_ z)`p-VM1nUzN#SEJL)&A)?QUUcu@~I^CWHAN!J!H%GQLP~KO+>&=Z9cvLJ)@61)wS4 z4@>{@fuXJ!W=wI1?s^w|Tj>bjKK5v?wZW0d9ynFd9T#m)Fn7EG?yl*I$elXayRRb* z_o_o}XFKL>`Hn$bKI880k2rI#5$Z{Gc&zpo`$ktHRQU>{ek#SV_Coxbmk;y29N4I4 zVd127oT*7c@V`m;ye zrElRD_zoq8@3C9889#md4BPMDA=Wx#ts~YtVyz?AI%2IO);eOXBi1@%ts~YtVyz?A zI%2IO);eOXBi1@%ts~YtVyz?AI%2IO);eOXBi1@%ts~YtVyz?AI%2IO);eOXBi1@% zts~YtVyz?AI%2IO);eOXBi1@%ts~YtVyz?AI%2IO);eOXBi1@%ts~YtVyz=?>xkPr z;sts`#hh}$~iwvM>1BW~-6+dAU5j<~HOZtIBKI^wpDxUC~@>xkPr;sts`#h zh}$~iwvM>1BW~-6+dAU5j<~HOZtIBKI^wpDxUC~@>xkPr;sts`#hh}$~iwvM>1 zBW~-6+dAU5j<~HOZtIBKI^wpDxUC~@>xkPr;@p%5Hs4#h=dDI&fJ_MVcVR4ok2 z4gzm&f!B2z9J&dv`$%!oR06Tq5o;Z>))8wRvDOi59kJFCYaOxH5o;Z>))8wRvDOi5 z9kJFCYaOxH5o;Z>))8wRvDOi59kJFCYaOxH5o;Z>))8wRvDOi59kJFCYaOxH5o;Z> z))8wRvDOi59kJFCYaOxH5o;Z>))8wRvDOi59kJFCYaOxH5o;Z>))8x+ray9V>P!~i zeMrZ?<*CqhO2#e2MC=QU$F9S%7~~lPuZ~JAv{0ZbQVy}!5o;Z>))8wRvDOi59kJFC zYaOxH5o;Z>))8wRvDOi59kJFCYaOxH5o;Z>))8wRvDOi59kJFCYaOxH5o;Z>))8wR zvDOi59kJFCYaOxH5o;Z>))8wRvDOi59kJFCYaOxH5o;Z>))8wRvDOi59kJFCYaOxH z`M<5!nYnVdwXcebiW)CfUcq`QDzm1qn9k|{9KyBmY(>xi^S*FBWZ{|KGLnwVvMp@8=HUeBiK%p9c0B*|#(2-kx6mZeHHte!pS;b-33% zyiZ?i;RA0qa8$%-WA2Swx@3*@sHH2H%(9MH8Z~#eb)dVaipuiEk&8lk`oE8U|NDY- T;k)_Y$4M;}m3*`R|IYsdM&-mi literal 0 HcmV?d00001 diff --git a/scipy/io/tests/test_wavfile.py b/scipy/io/tests/test_wavfile.py index 21df845d5a69..56a6ebffc6e6 100644 --- a/scipy/io/tests/test_wavfile.py +++ b/scipy/io/tests/test_wavfile.py @@ -78,6 +78,19 @@ def test_read_5(): del data +def test_read_24bit(): + for mmap in [False, True]: + rate, data, cues, pitch = wavfile.read( + datafile('test-44100Hz-24bit.wav'), mmap=mmap, return_cues=True, + return_pitch=True) + assert_equal(rate, 44100) + assert_(np.issubdtype(data.dtype, np.int32)) + assert_equal(data.shape, (11025, 2)) + assert_equal(pitch, 440.0) + assert_equal(cues, {1: {'pos': 4410}, 2: {'pos': 8820}}) + del data + + def test_read_fail(): for mmap in [False, True]: fp = open(datafile('example_1.nc')) diff --git a/scipy/io/wavfile.py b/scipy/io/wavfile.py index 5c7ac108d991..c06f1452a3bb 100644 --- a/scipy/io/wavfile.py +++ b/scipy/io/wavfile.py @@ -133,7 +133,7 @@ def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian, fid.seek(1, 1) if bit_depth == 24: - a = numpy.empty((len(data)/3, 4), dtype='u1') + a = numpy.empty((len(data)//3, 4), dtype='u1') a[:, :3] = data.reshape((-1, 3)) a[:, 3:] = (a[:, 3 - 1:3] >> 7) * 255 data = a.view(' Date: Thu, 5 Jan 2017 14:19:57 -0500 Subject: [PATCH 6/7] Simplify metadata return, use MSB for 24bit read --- scipy/io/tests/test_wavfile.py | 9 +++---- scipy/io/wavfile.py | 47 +++++++++++++++++----------------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/scipy/io/tests/test_wavfile.py b/scipy/io/tests/test_wavfile.py index 56a6ebffc6e6..fbbc67506c15 100644 --- a/scipy/io/tests/test_wavfile.py +++ b/scipy/io/tests/test_wavfile.py @@ -80,14 +80,13 @@ def test_read_5(): def test_read_24bit(): for mmap in [False, True]: - rate, data, cues, pitch = wavfile.read( - datafile('test-44100Hz-24bit.wav'), mmap=mmap, return_cues=True, - return_pitch=True) + rate, data, meta = wavfile.read(datafile('test-44100Hz-24bit.wav'), + mmap=mmap, return_metadata=True) assert_equal(rate, 44100) assert_(np.issubdtype(data.dtype, np.int32)) assert_equal(data.shape, (11025, 2)) - assert_equal(pitch, 440.0) - assert_equal(cues, {1: {'pos': 4410}, 2: {'pos': 8820}}) + assert_equal(meta['pitch'], 440.0) + assert_equal(meta['cues'], [(4410, None), (8820, None)]) del data diff --git a/scipy/io/wavfile.py b/scipy/io/wavfile.py index c06f1452a3bb..3753ed4cb348 100644 --- a/scipy/io/wavfile.py +++ b/scipy/io/wavfile.py @@ -113,7 +113,7 @@ def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian, # Number of bytes per sample bytes_per_sample = bit_depth//8 if bit_depth in (8, 24): - dtype = 'u1' + dtype = numpy.uint8 bytes_per_sample = 1 elif format_tag == WAVE_FORMAT_PCM: dtype = '%si%d' % (fmt, bytes_per_sample) @@ -133,10 +133,9 @@ def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian, fid.seek(1, 1) if bit_depth == 24: - a = numpy.empty((len(data)//3, 4), dtype='u1') - a[:, :3] = data.reshape((-1, 3)) - a[:, 3:] = (a[:, 3 - 1:3] >> 7) * 255 - data = a.view(' 1: data = data.reshape(-1, channels) @@ -185,7 +184,7 @@ def _read_riff_chunk(fid): return file_size, is_big_endian -def read(filename, mmap=False, return_cues=False, return_pitch=False): +def read(filename, mmap=False, return_metadata=False): """ Open a WAV file @@ -200,6 +199,8 @@ def read(filename, mmap=False, return_cues=False, return_pitch=False): Only to be used on real files (Default: False). .. versionadded:: 0.12.0 + return_metadata : bool, optional + Whether to return additional metadata (i.e., cues, pitch). Returns ------- @@ -246,7 +247,7 @@ def read(filename, mmap=False, return_cues=False, return_pitch=False): bit_depth = 8 format_tag = WAVE_FORMAT_PCM cues = defaultdict(dict) - pitch = 0.0 + pitch = None while fid.tell() < file_size: # read the next chunk chunk_id = fid.read(4) @@ -297,15 +298,15 @@ def read(filename, mmap=False, return_cues=False, return_pitch=False): else: fid.seek(0) - result = [fs, data] - if return_cues: - result.append(dict(cues)) - if return_pitch: - result.append(pitch) - return tuple(result) + if not return_metadata: + return fs, data + cues = sorted([(c['pos'], c.get('label', None)) for c in cues.values()]) + metadata = dict(cues=cues, pitch=pitch) + return fs, data, metadata -def write(filename, rate, data, cues=None, loops=None, bitrate=None): + +def write(filename, rate, data, cues=None, loops=None, bit_depth=None): """ Write a numpy array as a WAV file. @@ -321,9 +322,9 @@ def write(filename, rate, data, cues=None, loops=None, bitrate=None): Play order positions of cues. loops : sequence of (int,int) pairs, optional Pairs of (unity note, pitch fraction) values. - bitrate : int, optional + bit_depth : int, optional The number of bits per sample. - If None, bitrate is determined by the data-type. + If None, bit_depth is determined by the data-type. Notes ----- @@ -374,12 +375,10 @@ def write(filename, rate, data, cues=None, loops=None, bitrate=None): channels = 1 else: channels = data.shape[1] - if bitrate is None: + if bit_depth is None: bit_depth = data.dtype.itemsize * 8 - elif bitrate != 24 and bitrate != data.dtype.itemsize * 8: - raise ValueError("Unsupported bitrate for dtype: %s" % data.dtype) - else: - bit_depth = bitrate + elif bit_depth != 24 and bit_depth != data.dtype.itemsize * 8: + raise ValueError("Unsupported bit_depth for dtype: %s" % data.dtype) bytes_per_second = rate * (bit_depth // 8) * channels block_align = channels * (bit_depth // 8) @@ -404,14 +403,14 @@ def write(filename, rate, data, cues=None, loops=None, bitrate=None): fid.write(header_data) # data chunk - if bitrate == 24: + if bit_depth == 24: a32 = numpy.asarray(data, dtype=numpy.int32) if a32.ndim == 1: # Convert to a 2D array with a single column. - a32.shape = a32.shape + (1,) + a32 = a32[:, None] # By shifting first 0 bits, then 8, then 16, # the resulting output is 24 bit little-endian. - a8 = (a32.reshape(a32.shape + (1,)) >> numpy.array([0,8,16])) & 255 + a8 = (a32[:, None] >> numpy.array([0,8,16])) & 255 data = a8.astype(numpy.uint8) fid.write(b'data') From 205ab3edab7eebed7475da4303f83f0b9bc3c40f Mon Sep 17 00:00:00 2001 From: CJ Carey Date: Thu, 5 Jan 2017 14:23:35 -0500 Subject: [PATCH 7/7] Also return bit_depth in metadata --- scipy/io/tests/test_wavfile.py | 1 + scipy/io/wavfile.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scipy/io/tests/test_wavfile.py b/scipy/io/tests/test_wavfile.py index fbbc67506c15..426495ae0b5c 100644 --- a/scipy/io/tests/test_wavfile.py +++ b/scipy/io/tests/test_wavfile.py @@ -86,6 +86,7 @@ def test_read_24bit(): assert_(np.issubdtype(data.dtype, np.int32)) assert_equal(data.shape, (11025, 2)) assert_equal(meta['pitch'], 440.0) + assert_equal(meta['bit_depth'], 24) assert_equal(meta['cues'], [(4410, None), (8820, None)]) del data diff --git a/scipy/io/wavfile.py b/scipy/io/wavfile.py index 3753ed4cb348..d533cd7e4702 100644 --- a/scipy/io/wavfile.py +++ b/scipy/io/wavfile.py @@ -200,7 +200,7 @@ def read(filename, mmap=False, return_metadata=False): .. versionadded:: 0.12.0 return_metadata : bool, optional - Whether to return additional metadata (i.e., cues, pitch). + Whether to return additional metadata (cues, pitch, bit_depth). Returns ------- @@ -302,7 +302,7 @@ def read(filename, mmap=False, return_metadata=False): return fs, data cues = sorted([(c['pos'], c.get('label', None)) for c in cues.values()]) - metadata = dict(cues=cues, pitch=pitch) + metadata = dict(cues=cues, pitch=pitch, bit_depth=bit_depth) return fs, data, metadata