In [1]:
import wave as we
import matplotlib
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pywt
import math

In [2]:
class Wave:
    def __init__(self, filePath):
        self.audio = we.open(filePath, 'rb')
        nchannels = self.audio.getnchannels()
        sampwidth = self.audio.getsampwidth()
        self.framerate = self.audio.getframerate()
        self.nframes = self.audio.getnframes()
        comptype = self.audio.getcomptype()
        compname = self.audio.getcompname()
        self.params = self.audio.getparams()
        self.dataWav = self.audio.readframes(self.nframes)
        self.secs = self.nframes / self.framerate
        self.hammingWav = self.hamming()
        self.audio.close()

    def drawReg(self):
        wave = np.fromstring(self.dataWav, dtype=np.short)
        wave = wave.reshape((-1, 2))
        wave = wave.T

        x = np.arange(1024)

        fig = plt.figure('原始图像前1024个数据')
        plt.subplot(211)
        plt.scatter(x, wave[0][:1024], color='green', s=0.5)
        plt.subplot(212)
        plt.scatter(x, wave[1][:1024], s=0.5)
        fig.show()

    def draw(self, wave, x, name):
        fig = plt.figure(name)
        plt.subplot(211)
        plt.scatter(x, wave[0][:], color='green', s=0.5)
        plt.subplot(212)
        plt.scatter(x, wave[1][:], s=0.5)
        fig.show()

    def framing(self, wave_data, wlen=1024, inc=256):
        signal_length = self.nframes
        nf = int(np.ceil((1.0 * signal_length - wlen + inc) / inc))
        pad_length = int((nf - 1) * inc + wlen)  #所有帧加起来总的铺平后的长度
        zeros = np.zeros((pad_length - signal_length, ))

        pad_signal = np.concatenate((wave_data, zeros))  #填补后的信号记为pad_signal
        tile_0 = np.tile(np.arange(0, wlen), (nf, 1))
        tile_1 = np.tile(np.arange(0, nf * inc, inc), (wlen, 1)).T
        indices = tile_0 + tile_1

        indices = np.array(indices, dtype=np.int32)  #将indices转化为矩阵
        frames = pad_signal[indices]  #得到帧信号

        return frames

    def hamming(self, wlen=1024, inc=256):
        wave_data = np.fromstring(self.dataWav, dtype=np.short)
        wave_data = wave_data.reshape((-1, 2))
        wave_data = wave_data.T

        frame_0 = self.framing(wave_data[0].copy())
        frame_1 = self.framing(wave_data[1].copy())
        frames = np.r_[frame_0.copy(), frame_1.copy()]

        a = np.r_[frame_0[0].reshape((1, -1)), frame_1[0].reshape((1, -1))]
        x = np.array(range(wlen))
        self.draw(a, x, '分帧之后两个声道的第一帧')

        frames = (frames * np.hamming(wlen)).reshape(2, -1)

        self.draw(frames[:, :1024], x, '加窗之后两个声道的第一帧')

        return frames

    def mainDFT(self):
        res_little = self.hammingWav.copy()[:, :1024]

        # DFT
        res_dft = self.dft(res_little)

        fig = plt.figure('经过DFT之后两个声道的第一帧')
        plt.subplot(211)
        plt.scatter(np.real(res_dft)[0],
                    np.imag(res_dft)[0],
                    color='green',
                    s=0.5)
        plt.subplot(212)
        plt.scatter(np.real(res_dft)[1], np.imag(res_dft)[1], s=0.5)
        fig.show()

        #IDFT
        res_idft = self.idft(res_dft)
        x = np.array(range(res_idft.shape[1]))
        self.draw(res_idft, x, '经过IDFT之后两个声道的第一帧')

    def dft(self, res):
        temp = np.zeros_like(res, dtype='complex')
        row = temp.shape[0]
        column = temp.shape[1]
        for j in range(row):
            for i in range(column):
                exponent = 2 * math.pi * i / column
                cos = [math.cos(n * exponent) for n in range(column)]
                sin = [math.sin(n * exponent) * (1j) for n in range(column)]

                cos = np.array(cos).reshape((-1, 1))
                sin = np.array(sin).reshape((-1, 1))

                real = np.dot(res[j, :].reshape((1, -1)), cos)[0][0]
                imag = np.dot(res[j, :].reshape((1, -1)), sin)[0][0]

                temp[j][i] = real - imag

        return temp.copy()

    def idft(self, res):
        temp = np.zeros_like(res, dtype='complex')
        column = temp.shape[1]
        row = temp.shape[0]

        for j in range(row):
            for i in range(column):
                exponent = 2 * math.pi * i / column

                cos = [math.cos(k * exponent) for k in range(column)]
                sin = [math.sin(k * exponent) * (1j) for k in range(column)]

                cos = np.array(cos).reshape((1, -1))
                sin = np.array(sin).reshape((1, -1))

                real = np.mean(res[j, :].reshape((1, -1)) * cos)
                imag = np.mean(res[j, :].reshape((1, -1)) * sin)

                temp[j][i] = real + imag

        temp = np.real(temp)

        return temp.copy()

    def mainDCT(self):
        res_little = self.hammingWav.copy()[:, :1024]

        res_dct = self.dct(res_little)
        x = np.array(range(res_dct.shape[1]))
        self.draw(res_dct, x, '经过DCT之后两个声道的第一帧')

        res_idct = self.dct(res_dct)
        x = np.array(range(res_dct.shape[1]))
        self.draw(res_idct, x, '经过IDCT之后两个声道的第一帧')

    def dct(self, res):
        column = res.shape[1]
        row = res.shape[0]

        c = np.sqrt(2 / column) * np.ones((1, column))
        c[0][0] = np.sqrt(1 / column)

        cos = np.pi * np.ones((column, column)) / column
        column_line = np.linspace(0.5, column - 0.5, column).reshape((-1, 1))
        row_line = np.array(range(column)).reshape((1, -1))
        cos = cos * column_line * row_line
        cos = np.cos(cos)

        temp = np.dot(res, cos)
        temp = temp * c

        return temp.copy()

    def mainDWT(self):
        data = self.hammingWav.copy()

        cA0, cD0 = pywt.dwt(data[0], 'db2', 'smooth')
        upcoef_cA0 = pywt.upcoef('a', cA0, 'db2', take=data.shape[1])
        upcoef_cD0 = pywt.upcoef('d', cD0, 'db2', take=data.shape[1])
        res0 = upcoef_cA0 + upcoef_cD0

        cA1, cD1 = pywt.dwt(data[1], 'db2', 'smooth')
        upcoef_cA1 = pywt.upcoef('a', cA1, 'db2', take=data.shape[1])
        upcoef_cD1 = pywt.upcoef('d', cD1, 'db2', take=data.shape[1])
        res1 = upcoef_cA1 + upcoef_cD1

        res = np.r_[res0, res1].reshape((2, -1))

        self.draw(res[:, :1024], np.array(range(1024)), '经过DWT之后两个声道的第一帧')

    def getInfo(self):
        print(self.params)

In [3]:
filePath = 'wav/fav.wav'
wav = Wave(filePath)



In [4]:
wav.getInfo()
wav.drawReg()
wav.mainDFT()
wav.mainDCT()
wav.mainDWT()

_wave_params(nchannels=2, sampwidth=2, framerate=44100, nframes=26460, comptype='NONE', compname='not compressed')


