In [19]:
import torch
import torch.nn as nn

import os
from glob import glob
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [20]:
class ConditionalGenerator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_shape, num_classes):
        super(ConditionalGenerator, self).__init__()
        self.output_shape = output_shape
        self.output_dim = output_shape[0] * output_shape[1]
        self.label_embedding = nn.Embedding(num_classes, input_dim)  # 將類別標籤轉換為與 z 相同維度的向量
        self.model = nn.Sequential(
            nn.Linear(input_dim * 2, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim * 2),
            nn.BatchNorm1d(hidden_dim * 2),
            nn.ReLU(),
            nn.Linear(hidden_dim * 2, hidden_dim * 4), 
            nn.BatchNorm1d(hidden_dim * 4),
            nn.ReLU(),
            nn.Linear(hidden_dim * 4, self.output_dim),
            nn.Tanh()
        )

    def forward(self, z, labels):
        label_embedding = self.label_embedding(labels)
        input_data = torch.cat([z, label_embedding], dim=1)  # 將噪聲 z 與條件 labels 拼接
        x = self.model(input_data)
        return x.view(x.size(0), *self.output_shape)
    
# 參數
input_dim =  2048 
hidden_dim = 128
mfcc_shape = (13, 44)
num_classes = 1467

# 初始化生成器
generator = ConditionalGenerator(input_dim, hidden_dim, mfcc_shape, num_classes).to(device)
# 載入儲存的生成器權重
generator.load_state_dict(torch.load('cGAN_generator_model.pth'))
# 確保模型設定為評估模式
generator.eval()

  generator.load_state_dict(torch.load('cGAN_generator_model.pth'))


ConditionalGenerator(
  (label_embedding): Embedding(1467, 2048)
  (model): Sequential(
    (0): Linear(in_features=4096, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=256, bias=True)
    (4): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=256, out_features=512, bias=True)
    (7): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Linear(in_features=512, out_features=572, bias=True)
    (10): Tanh()
  )
)

In [21]:
# 如果沒有 'fakedata' 這個資料夾就做一個
if not os.path.exists(f"{os.getcwd()}\\fakedata"):
    os.mkdir(f"{os.getcwd()}\\fakedata")

與之前的用法相反
{0: 'a',
1: 'a2',
2: 'a4'}

In [22]:
sample_list = glob(f"{os.getcwd()}\\samplePinyin\\Male\\*.wav")

label_dic: dict = {}
for i in range(len(sample_list)):
    label = sample_list[i][sample_list[i].find("_") + 1:sample_list[i].find(".wav")]
    label_dic[i] = label

print(f"type(label_dic): {type(label_dic)}")
print(f"len(label_dic): {len(label_dic)}")
# label_dic

type(label_dic): <class 'dict'>
len(label_dic): 1467


In [23]:
batch_size = 10
z = torch.randn(batch_size, input_dim).to(device)

for i in range(len(label_dic)):#len(label_dic)
    labels = torch.tensor([i] * batch_size).to(device)  # 生成標籤為 10 的資料
    fake_data = generator(z, labels)
    for j in range(len(fake_data)):
        path = (f'{os.getcwd()}\\fakedata\\fakeData-{j}_{label_dic[i]}.npy')
        np.save( path, arr=fake_data[0].detach().cpu().numpy())