In [1]:
import torch
import torch.nn as nn
from torchsummary import summary
import librosa

import math
import sys
import time
from pathlib import Path
from IPython.display import Audio

In [5]:
class Generator(nn.Module):
    def __init__(self, model_size=32, z_dim=100):
        super().__init__()
        self.model_size = model_size

        self.full_connection_1 = nn.Linear(z_dim, 512*model_size)

        self.layer_1 = nn.Sequential(
            nn.ConvTranspose1d(in_channels=32*model_size,
                               out_channels=16*model_size,
                               kernel_size=25,
                               stride=4,
                               padding=11,
                               output_padding=1),
            nn.ReLU(inplace=True))
        self.layer_2 = nn.Sequential(
            nn.ConvTranspose1d(in_channels=16*model_size,
                               out_channels=8*model_size,
                               kernel_size=25,
                               stride=4,
                               padding=11,
                               output_padding=1),
            nn.ReLU(inplace=True))
        self.layer_3 = nn.Sequential(
            nn.ConvTranspose1d(in_channels=8*model_size,
                               out_channels=4*model_size,
                               kernel_size=25,
                               stride=4,
                               padding=11,
                               output_padding=1),
            nn.ReLU(inplace=True))
        self.layer_4 = nn.Sequential(
            nn.ConvTranspose1d(in_channels=4*model_size,
                               out_channels=2*model_size,
                               kernel_size=25,
                               stride=4,
                               padding=11,
                               output_padding=1),
            nn.ReLU(inplace=True))
        self.layer_5 = nn.Sequential(
            nn.ConvTranspose1d(in_channels=2*model_size,
                               out_channels=model_size,
                               kernel_size=25,
                               stride=4,
                               padding=11,
                               output_padding=1),
            nn.ReLU(inplace=True))
        self.layer_6 = nn.Sequential(
            nn.ConvTranspose1d(in_channels=model_size,
                               out_channels=1,
                               kernel_size=25,
                               stride=4,
                               padding=11,
                               output_padding=1),
            nn.Tanh())

    def forward(self, x):
        x = self.full_connection_1(x).view(-1, 32*self.model_size, 16)
        x = torch.nn.functional.relu(x)
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.layer_5(x)
        output = self.layer_6(x)
        return output

In [6]:
model = Generator()

In [7]:
summary(model, (100, ))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 16384]       1,654,784
   ConvTranspose1d-2              [-1, 512, 64]      13,107,712
              ReLU-3              [-1, 512, 64]               0
   ConvTranspose1d-4             [-1, 256, 256]       3,277,056
              ReLU-5             [-1, 256, 256]               0
   ConvTranspose1d-6            [-1, 128, 1024]         819,328
              ReLU-7            [-1, 128, 1024]               0
   ConvTranspose1d-8             [-1, 64, 4096]         204,864
              ReLU-9             [-1, 64, 4096]               0
  ConvTranspose1d-10            [-1, 32, 16384]          51,232
             ReLU-11            [-1, 32, 16384]               0
  ConvTranspose1d-12             [-1, 1, 65536]             801
             Tanh-13             [-1, 1, 65536]               0
Total params: 19,115,777
Trainable para