In [2]:
import wave  
import struct
import os
from scipy import signal
import numpy as np
import tensorflow as tf
# from tensorflow.python.ops import variable_scope as vs
tf.reset_default_graph()

path = r'C:\Users\xujiahao\Desktop\MIR-1K_for_MIREX\trainwav' #文件夹目录 

fname1 = 'left.wav'
fname2 = 'right.wav'

nframes = 96000
framerate = 16000
sampwidth = 2

batch_size = 1
num_steps = 189
step_num = 513
rnn_hidden1_size = 200 #它的大小可自己随便取,hidden1_size==step_num
rnn_hidden2_size = 100
rnn_hiddenL_size = 200
state_size = 200 #等于rnn_hiddenL_size最后一个隐藏层的size
y_size = step_num*2 #等于step_num的两倍
soft=1e-4

#生成音频文件
def Generate_Wav(fname, wave_data, width, rate):
    wf = wave.open(fname,'wb')
    wf.setnchannels(1)
    wf.setsampwidth(width)
    wf.setframerate(rate)
    for i in wave_data:
        data = struct.pack('<h', int(i))
        wf.writeframesraw( data )
    wf.close()

#对读入文件预处理
def init_handle(filename):
    f = wave.open(filename)
    params = f.getparams()  #读取格式信息
    #一次性返回所有的WAV文件的格式信息，它返回的是一个组元(tuple)：声道数, 量化位数（byte单位）, 采  
    #样频率, 采样点数, 压缩类型, 压缩类型的描述。wave模块只支持非压缩的数据，因此可以忽略最后两个信息
    nframes1 = params[3]
    #读取声音数据，传递一个参数指定需要读取的长度（以取样点为单位）
    str_buf  = f.readframes(nframes1)
    f.close()
    str_data = np.frombuffer(str_buf,dtype = np.short)
    #将wave_data数组改为2列，行数自动匹配。在修改shape的属性时，需使得数组的总长度不变。
    str_data.shape = -1,2
    str_data = str_data.T   #转置数据,第一行为为左声道（配乐），第二行为右声道（人声）
    if(len(str_data[0]) >= nframes):#变为单声道采样点为96000（时长6秒）
        wave_left = str_data[0, :nframes]
        wave_right = str_data[1, :nframes]
    if(len(str_data[0]) < nframes):
        concat = np.zeros(nframes-len(str_data[0]))
        wave_left = np.append(str_data[0], concat)
        wave_right = np.append(str_data[1], concat)
    #短时傅里叶变换    
    f, t, Z_left1 = signal.stft(wave_left, fs = framerate, nperseg = 1024, noverlap = 512)
    f, t, Z_right1 = signal.stft(wave_right, fs = framerate, nperseg = 1024, noverlap = 512)
    theta = np.angle(Z_left1 + Z_right1)  #求混合语音傅里叶变换后的相位谱
    Z_left = Z_left1.T
    Z_right = Z_right1.T
    left = abs(Z_left)  #音乐声幅度谱
    right = abs(Z_right)  #人声幅度谱
    y1 = (left+0.1)/(left + right + 0.1)  #labelY
    y2 = (right+0.1)/(left + right +0.1)
    X = abs(Z_left+Z_right) #混合语音幅度谱
    #X = np.reshape(X,(1, num_steps, step_num))
    Y = np.column_stack((y1,y2))
    return theta, X, Y

#对预测数据进行处理
def out_handle(theta, X, prediction):
    X = X.T
    prediction = prediction.T
    y1 = prediction[:step_num, :]
    y2 = prediction[step_num:, :]
    s1 = (y1+soft)/(y1 + y2 + soft)  #软时频掩模
    s2 = (y2+soft)/(y1 + y2 + soft)
    X_left = X * s1   #左声道（音乐声）的幅度谱
    X_right = X * s2
    Z_left = X_left*np.cos(theta) + 1j*X_left*np.sin(theta)
    Z_right = X_right*np.cos(theta) + 1j*X_right*np.sin(theta)
    _, data_left = signal.istft(Z_left, framerate, nperseg = 1024, noverlap = 512)
    _, data_right = signal.istft(Z_right, framerate, nperseg = 1024, noverlap = 512)
    Generate_Wav(fname1, data_left, sampwidth, framerate)
    Generate_Wav(fname2, data_right, sampwidth, framerate)

class AudioRNN:
    def __init__(self):
        
        self.x1 = tf.placeholder(tf.float32, [num_steps, step_num], name='input_placeholder')
        self.x = tf.reshape(self.x1, [batch_size, num_steps, step_num])
        self.y = tf.placeholder(tf.float32, [num_steps,y_size], name='output_placeholder')
        self.lr = tf.Variable(0.01,dtype=tf.float32)
        
    def RNNLayer(self):
        rnn_layers = [tf.nn.rnn_cell.BasicRNNCell(num_units=size, reuse=tf.AUTO_REUSE) for size in [rnn_hidden1_size, rnn_hidden2_size, rnn_hiddenL_size]]
        rnn_multi = tf.contrib.rnn.MultiRNNCell(rnn_layers)
        initial_state = rnn_multi.zero_state(batch_size, dtype=tf.float32)
        #with tf.variable_scope("scope", reuse=None):
        outputs, last_states = tf.nn.dynamic_rnn(cell=rnn_multi,inputs=self.x, initial_state=initial_state, dtype=tf.float32)
        outputs = tf.reshape(outputs, [-1, state_size])
        
        with tf.variable_scope("sigmoid1", reuse=tf.AUTO_REUSE):
            W1 = tf.get_variable('W', [state_size, y_size])#随机生成范围在正态分布标准差为0.1的w
            b1 = tf.get_variable('b', [y_size], initializer=tf.constant_initializer(0.0))
        #logits1 = tf.reshape(tf.matmul(outputs, W) + b,[num_steps,y_size])
        logits = tf.matmul(outputs, W1) + b1
        predictions = tf.nn.sigmoid(logits)
        return logits, predictions
    
    #创建RNN网络会话框并训练
    def train(self, tf_save_path):
        epoch = 3
        
        logits,_ = self.RNNLayer()
        
        losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,labels=self.y)
        total_loss = tf.reduce_mean(losses)
        train_step = tf.train.AdadeltaOptimizer(self.lr).minimize(total_loss)#优化器和最小化损失，就是损失函数对参数的计算
        #print(losses,total_loss,train_step)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())#初始化变量
            #训练
            for i in range(epoch):
                sess.run(tf.assign(self.lr,0.5)) #可以调整每次训练的学习率，例如0.5*（0.9 ** i）
                for filename in os.listdir(path):
                    filename = path+"/"+filename
                    _, train_X, train_Y = init_handle(filename)
    #                 train_X1 = np.reshape(train_X,(batch_size, num_steps, step_num)) #变成rnn可接受的三维矩阵
                    to, _ = sess.run([total_loss,train_step],feed_dict={self.x1:train_X, self.y:train_Y})
#                 if(i%10==0):
                print("step"+str(i)+"\t"+"loss:",to)

            save_path = saver.save(sess, tf_save_path)
#             writer = tf.summary.FileWriter("./summary", tf.get_default_graph())
#             writer.close()
            print("Model saved in file: %s" %save_path)
#             sess.close()

    def test(self, testfile, tfsavepath):
#         vs.get_variable_scope().reuse_variables()
        _, predictions = self.RNNLayer()
       
        test_theta, test_X, _ = init_handle(testfile)
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver.restore(sess, tfsavepath)
            prediction = sess.run(predictions,feed_dict={self.x1:test_X})
            out_handle(test_theta, test_X, prediction)
            print(prediction.shape)

import pyaudio
import tkinter as tk
from PIL import ImageTk, Image
from tkinter import filedialog
class Control:
    def __init__(self, RNN, savepath, testfile):
        self.RNN = RNN
        self.savepath = savepath
        self.testfile = testfile
        self.root = tk.Tk()  
        self.root.title("控制面板")  # 给主窗口设置标题内容
        self.root.geometry('500x352')
        self.canvas = tk.Canvas(self.root, height=352, width=500)#创建画布      
        self.imgpath = 'kehuan.jpg'
        self.img = Image.open(self.imgpath)
        self.image_file = ImageTk.PhotoImage(self.img)#加载图片文件
        self.canvas.create_image(0,0, anchor='nw', image=self.image_file)#将图片置于画布上  
        self.canvas.pack(side='top')
        
        self.trainBtn = tk.Button(self.canvas, command = self.TrainBtn, text = "开始训练")
        self.getfileBtn = tk.Button(self.canvas, command = self.SelectFile, text = "选择音频文件")
        self.separateBtn = tk.Button(self.canvas, command = self.SeparateBtn, text = "开始分离")
        self.leftBtn = tk.Button(self.canvas, command = self.LeftBtn, text = "播放背景音乐")
        self.rightBtn = tk.Button(self.canvas, command = self.RightBtn, text = "播放人声")

#         self.p = tk.StringVar()
#         self.p.set("播放提示！")
        self.trainlabel = tk.Label(self.canvas)
        self.text = tk.Text(self.canvas)#显示音频文件
        self.separatelabel = tk.Label(self.canvas, text="请先确认分离默认音频或已经选好的！", foreground="red")
        self.playlabel = tk.Label(self.canvas, text="播放提示！", foreground="red")

    def gui_arrang(self):
        self.canvas.create_window(100, 50, width=80, height=30,window=self.trainBtn)
        self.canvas.create_window(100, 130, width=80, height=30,window=self.getfileBtn)
        self.canvas.create_window(100, 210, width=80, height=30,window=self.separateBtn)
        self.canvas.create_window(150, 290, width=80, height=30,window=self.leftBtn)
        self.canvas.create_window(350, 290, width=80, height=30,window=self.rightBtn)

        self.canvas.create_window(300, 50, width=300, height=30, window=self.trainlabel)
        self.canvas.create_window(300, 130, width=300, height=30, window=self.text)
        self.canvas.create_window(300, 210, width=300, height=30, window=self.separatelabel)
        self.canvas.create_window(250, 300, width=80, height=30, window=self.playlabel)

        if os.path.exists(self.savepath + '.meta'):
            self.trainlabel.config(text="已有模型，可直接分离！", foreground="red")
        else:
            self.trainlabel.config(text="请先训练模型！", foreground="red")
            
        self.text.tag_config("tag1", foreground="red", offset=-7)
        self.text.insert(tk.INSERT, self.testfile, "tag1")
        self.text.config(state=tk.DISABLED)
        self.root.mainloop()
        
    def TrainBtn(self):
        self.trainlabel.config(text="正在训练，请稍等！", foreground="red")
        self.RNN.train(self.savepath)
        self.trainlabel.config(text="完成训练！", foreground="red")

    def SelectFile(self):
        self.testfile = tk.filedialog.askopenfilename()
        self.text.tag_config("tag1", foreground="red")
        if self.testfile != '':
            self.text.config(state=tk.NORMAL)
            self.text.delete(1.0, tk.END)
            self.text.insert(tk.INSERT, self.testfile, "tag1")
            self.text.config(state=tk.DISABLED)
        else:
            self.text.config(state=tk.NORMAL)
            self.text.delete(1.0, tk.END)
            self.text.insert(tk.INSERT, "您未选择文件！", "tag1")
            self.text.config(state=tk.DISABLED)
            
    def SeparateBtn(self):
        self.separatelabel.config(text="正在分离，请稍等！", foreground="red")
        self.RNN.test(self.testfile, self.savepath)
        self.separatelabel.config(text="完成分离！", foreground="red")
        
    def LeftBtn(self):
        self.playlabel.config(text="正在播放！", foreground="red")
        filename = 'left.wav'
        self.PlayMusic(filename)
        
    def RightBtn(self):
        filename = 'right.wav'
        self.PlayMusic(filename)
        
    def PlayMusic(self, filename):
        self.playlabel.config(text="正在播放！", foreground="red")
        wf = wave.open(filename,'rb')
        pms = wf.getparams()
        nchannels1, sampwidth1, framerate1, nframes1 = pms[:4]
        p = pyaudio.PyAudio() 
        stream=p.open(format = p.get_format_from_width(sampwidth1), channels = nchannels1, rate = framerate1, output = True)
        data=wf.readframes(nframes1)
        stream.write(data)
        stream.stop_stream()   # 停止数据流
        stream.close()
        p.terminate()  # 关闭 PyAudio
        self.playlabel.config(text="播放结束！", foreground="red")
        print("播放结束！")
            
def main(_):
    savepath = 'AudioRNN.ckpt'
    testfile = r'C:\Users\xujiahao\Desktop\MIR-1K_for_MIREX\Wavfile\amy_7_03.wav'
    RNN = AudioRNN()
    C = Control(RNN, savepath, testfile)
    C.gui_arrang()

#     RNN.train(savepath)
#     RNN.test(testfile, savepath)

if __name__ == '__main__':
    main(0)

INFO:tensorflow:Restoring parameters from AudioRNN.ckpt
(189, 1026)
播放结束！
播放结束！


In [None]:
import tkinter as tk
from PIL import ImageTk, Image
from tkinter import filedialog
import wave
import pyaudio
import pygame

# path = r'C:\Users\xujiahao\Desktop\MIR-1K_for_MIREX\trainwav' #文件夹目录 
# print(len(os.listdir(path)))

class Control:
    def __init__(self, RNN, savepath, testfile):
        self.RNN = RNN
        self.savepath = savepath
        self.testfile = testfile
        self.root = tk.Tk()  
        self.root.title("控制面板")  # 给主窗口设置标题内容
        self.root.geometry('500x352')
        self.canvas = tk.Canvas(self.root, height=352, width=500)#创建画布      
        self.imgpath = 'kehuan.jpg'
        self.img = Image.open(self.imgpath)
        self.image_file = ImageTk.PhotoImage(self.img)#加载图片文件
        self.canvas.create_image(0,0, anchor='nw', image=self.image_file)#将图片置于画布上  
        self.canvas.pack(side='top')
        
        self.trainBtn = tk.Button(self.canvas, command = self.TrainBtn, text = "开始训练")
        self.getfileBtn = tk.Button(self.canvas, command = self.SelectFile, text = "选择语频文件")
        self.testBtn = tk.Button(self.canvas, command = self.TestBtn, text = "开始分离")
        self.leftBtn = tk.Button(self.canvas, command = self.LeftBtn, text = "播放背景音乐")
        self.rightBtn = tk.Button(self.canvas, command = self.RightBtn, text = "播放人声")

        self.label = tk.Label(self.canvas)
        self.text = tk.Text(self.canvas)

    def gui_arrang(self):
        self.canvas.create_window(100, 50, width=80, height=30,window=self.trainBtn)
        self.canvas.create_window(100, 130, width=80, height=30,window=self.getfileBtn)
        self.canvas.create_window(100, 210, width=80, height=30,window=self.testBtn)
        self.canvas.create_window(150, 290, width=80, height=30,window=self.leftBtn)
        self.canvas.create_window(350, 290, width=80, height=30,window=self.rightBtn)

        self.canvas.create_window(300, 50, width=300, height=30, window=self.label)
        self.canvas.create_window(300, 130, width=300, height=30, window=self.text)

        self.text.tag_config("tag1", foreground="red", offset=-7)
        self.text.insert(tk.INSERT, self.testfile, "tag1")
        self.text.config(state=tk.DISABLED)
        self.root.mainloop()
        
    def TrainBtn(self):
        print(self.RNN)

    def SelectFile(self):
        self.testfile = tk.filedialog.askopenfilename()
        self.text.tag_config("tag1", foreground="red")
        if self.testfile != '':
            self.text.config(state=tk.NORMAL)
            self.text.delete(1.0, tk.END)
            self.text.insert(tk.INSERT, self.testfile, "tag1")
            self.text.config(state=tk.DISABLED)
        else:
            self.text.config(state=tk.NORMAL)
            self.text.delete(1.0, tk.END)
            self.text.insert(tk.INSERT, "您未选择文件", "tag1")
            self.text.config(state=tk.DISABLED)
            
    def TestBtn(self):
        print(self.savepath)
        
    def LeftBtn(self):
        fname1 = 'left.wav'
        self.PlayMusic(fname1)
        print(1)
        
    def RightBtn(self):
        fname2 = 'right.wav'
        self.PlayMusic(fname2)
        print(2)
        
    def PlayMusic(self, filename):
        wf = wave.open(filename,'rb')
        pms = wf.getparams()
        nchannels1, sampwidth1, framerate1, nframes1 = pms[:4]
        p = pyaudio.PyAudio() 
        stream=p.open(format = p.get_format_from_width(sampwidth1), channels = nchannels1, rate = framerate1, output = True)
#         while True:
        data=wf.readframes(nframes1)
#             if data=="":
#                 break
        stream.write(data)
        stream.stop_stream()   # 停止数据流
        stream.close()
        p.terminate()  # 关闭 PyAudio
        print("播放结束！")
        
#     def PlayMusic(self, filename, loops=0, start=0.0, value=0.5):
#         flag = True
#         pygame.mixer.init()  # 音乐模块初始化
#         track = pygame.mixer.music.load(filename)
#         pygame.mixer.music.play()
#         pygame.mixer.music.set_volume(value)  # 来设置播放的音量，音量value的范围为0.0到1.0。
#         pygame.mixer.music.play(loops=loops, start=start)  #loops和start分别代表重复的次数和开始播放的位置。
# #         pygame.mixer.music.stop()  # 停止播放
# #         music = pygame.mixer.Sound(filename)
# #         music.play()
# #         music.stop()

C = Control(1,2,"wer")
C.gui_arrang()