# TGAN Audio

## Time-series Generative Adversarial Networks

GRU based TimeGAN implementation with 4-Channel Audio Data

Experiment Settings (Import necessary packages and functions)

In [1]:
import numpy as np
import pandas as pd
import sys
import IPython.display as ipd
#%% Functions
# 1. Models
from tgan import tgan

# 2. Data Loading
from data_loading_2 import audio_data_loading, sine_data_generation

# 3. Metrics
sys.path.append('metrics')
from discriminative_score_metrics import discriminative_score_metrics
from visualization_metrics import PCA_Analysis, tSNE_Analysis
from predictive_score_metrics import predictive_score_metrics

print('Finish importing necessary packages and functions')


Finish importing necessary packages and functions


Set main parameters and datasets

In [2]:
#%% Main Parameters
# Data
data_set = ['audio','sine']
data_name = data_set[0]

# Experiments iterations
Iteration = 2
Sub_Iteration = 3

#%% Data Loading
seq_length = 24

if data_name == 'audio':
    dataX = audio_data_loading(seq_length)
elif data_name == 'sine':
    No = 10000
    F_No = 5
    dataX = sine_data_generation(No, seq_length, F_No)

print(data_name + ' dataset is ready.')


audio dataset is ready.


In [3]:
np.array(dataX).shape

(3935, 24, 4)

In [4]:
origData = np.loadtxt('data/AUDIO_DATA.csv', delimiter = ",",skiprows = 1, usecols=range(1,5))
minData=np.min(origData, 0)
maxData=np.max(origData, 0)

In [5]:
ipd.Audio(np.array(origData).T, rate=22050, autoplay=False)

Set network parameters & Output initialization

In [6]:
#%% Network Parameters
parameters = dict()

parameters['hidden_dim'] = len(dataX[0][0,:]) * 4
parameters['num_layers'] = 3
parameters['iterations'] = 50000
parameters['batch_size'] = 128
parameters['module_name'] = 'gru'   # Other options: 'lstm' or 'lstmLN'
parameters['z_dim'] = len(dataX[0][0,:])

print('Parameters are ' + str(parameters))

#%% Experiments
# Output Initialization
Discriminative_Score = list()
Predictive_Score = list()

Parameters are {'z_dim': 4, 'batch_size': 128, 'hidden_dim': 16, 'iterations': 50000, 'num_layers': 3, 'module_name': 'gru'}


Run TGAN & Evaluate discriminative and predictive scores

In [7]:
print('Start iterations')

# Each Iteration
for it in range(Iteration):


    # Synthetic Data Generation
    dataX_hat = tgan(dataX, parameters)

    print('Finish Synthetic Data Generation')

    #%% Performance Metrics

    # 1. Discriminative Score
    Acc = list()
    for tt in range(Sub_Iteration):
        Temp_Disc = discriminative_score_metrics (dataX, dataX_hat)
        Acc.append(Temp_Disc)

    Discriminative_Score.append(np.mean(Acc))

    # 2. Predictive Performance
    MAE_All = list()
    for tt in range(Sub_Iteration):
        MAE_All.append(predictive_score_metrics (dataX, dataX_hat))

    Predictive_Score.append(np.mean(MAE_All))

print('Finish TGAN iterations')

Start iterations
Start Embedding Network Training
step: 0, e_loss: 0.0848
step: 1000, e_loss: 0.0291
step: 2000, e_loss: 0.0214
step: 3000, e_loss: 0.0109
step: 4000, e_loss: 0.0064
step: 5000, e_loss: 0.0049
step: 6000, e_loss: 0.0055
step: 7000, e_loss: 0.0039
step: 8000, e_loss: 0.0041
step: 9000, e_loss: 0.0047
step: 10000, e_loss: 0.0045
step: 11000, e_loss: 0.0049
step: 12000, e_loss: 0.0039
step: 13000, e_loss: 0.0025
step: 14000, e_loss: 0.0043
step: 15000, e_loss: 0.0028
step: 16000, e_loss: 0.0028
step: 17000, e_loss: 0.0038
step: 18000, e_loss: 0.003
step: 19000, e_loss: 0.0053
step: 20000, e_loss: 0.0038
step: 21000, e_loss: 0.0021
step: 22000, e_loss: 0.0026
step: 23000, e_loss: 0.0031
step: 24000, e_loss: 0.003
step: 25000, e_loss: 0.0022
step: 26000, e_loss: 0.0034
step: 27000, e_loss: 0.0017
step: 28000, e_loss: 0.0027
step: 29000, e_loss: 0.002
step: 30000, e_loss: 0.0043
step: 31000, e_loss: 0.0036
step: 32000, e_loss: 0.0043
step: 33000, e_loss: 0.0027
step: 34000, e

Start Embedding Network Training
step: 0, e_loss: 0.0824
step: 1000, e_loss: 0.0133
step: 2000, e_loss: 0.0066
step: 3000, e_loss: 0.0042
step: 4000, e_loss: 0.0058
step: 5000, e_loss: 0.0052
step: 6000, e_loss: 0.0069
step: 7000, e_loss: 0.0059
step: 8000, e_loss: 0.0033
step: 9000, e_loss: 0.0039
step: 10000, e_loss: 0.0055
step: 11000, e_loss: 0.0023
step: 12000, e_loss: 0.0046
step: 13000, e_loss: 0.0039
step: 14000, e_loss: 0.0034
step: 15000, e_loss: 0.0034
step: 16000, e_loss: 0.003
step: 17000, e_loss: 0.0029
step: 18000, e_loss: 0.003
step: 19000, e_loss: 0.0028
step: 20000, e_loss: 0.0038
step: 21000, e_loss: 0.0018
step: 22000, e_loss: 0.0016
step: 23000, e_loss: 0.0017
step: 24000, e_loss: 0.0027
step: 25000, e_loss: 0.0024
step: 26000, e_loss: 0.0032
step: 27000, e_loss: 0.0032
step: 28000, e_loss: 0.001
step: 29000, e_loss: 0.0033
step: 30000, e_loss: 0.0033
step: 31000, e_loss: 0.0027
step: 32000, e_loss: 0.0024
step: 33000, e_loss: 0.0017
step: 34000, e_loss: 0.0031
ste

Visualization (PCA Analysis)

In [8]:
PCA_Analysis (dataX, dataX_hat)

  if self._edgecolors == 'face':


Visualization (t-SNE Analysis)

In [9]:
tSNE_Analysis (dataX, dataX_hat)

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2000 samples in 0.011s...
[t-SNE] Computed neighbors for 2000 samples in 0.155s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2000
[t-SNE] Computed conditional probabilities for sample 2000 / 2000
[t-SNE] Mean sigma: 0.008089


  if np.rank(self.data) != 1 or np.rank(self.indices) != 1 or np.rank(self.indptr) != 1:


[t-SNE] KL divergence after 250 iterations with early exaggeration: 62.789719
[t-SNE] KL divergence after 300 iterations: 0.868337


Print Discriminative and Predictive Scores

In [10]:
# Print Results
print('Discriminative Score - Mean: ' + str(np.round(np.mean(Discriminative_Score),4)) + ', Std: ' + str(np.round(np.std(Discriminative_Score),4)))
print('Predictive Score - Mean: ' + str(np.round(np.mean(Predictive_Score),4)) + ', Std: ' + str(np.round(np.std(Predictive_Score),4)))


Discriminative Score - Mean: 0.0539, Std: 0.0113
Predictive Score - Mean: 0.013, Std: 0.0032


In [11]:
import pandas as pd
#df = pd.DataFrame(dataX_hat)
#dataX_hat.to_csv("file.csv")
#Open,High,Low,Close,Adj_Close
output_array = np.array(dataX_hat)

# Reshape the array to 2D. The new shape will have 10000 * 24 rows and 5 columns.
array_reshaped = output_array.reshape(-1, output_array.shape[-1])

# Create a DataFrame from the 2D array
df = pd.DataFrame(array_reshaped)

# Write the DataFrame to a CSV file
df.to_csv('data/SytheticAudioData.csv', index=False)

In [12]:

output_array = np.array(dataX_hat)
#ipd.Audio(array_reshaped, rate=44100, autoplay=True) # load a NumPy array

In [13]:
array_reshaped

array([[0.55529225, 0.6015346 , 0.47972688, 0.54958385],
       [0.56833684, 0.6398578 , 0.48225757, 0.580899  ],
       [0.56271535, 0.66058046, 0.46104455, 0.5866006 ],
       ...,
       [0.4984278 , 0.47094068, 0.50095576, 0.4857442 ],
       [0.49848357, 0.47096965, 0.5012413 , 0.48602936],
       [0.49853447, 0.4710003 , 0.50153655, 0.48633233]], dtype=float32)

In [14]:
generatedData = dataX_hat
inverted = generatedData * (maxData - minData) + minData

In [15]:
#inverted

In [16]:
np.min(inverted, 0)

array([[-0.16337442, -0.20518963, -0.17272172, -0.27128257],
       [-0.17014107, -0.19788465, -0.2772517 , -0.25516364],
       [-0.18435227, -0.18327065, -0.25486999, -0.2267248 ],
       [-0.15352295, -0.18251723, -0.25695827, -0.22335802],
       [-0.13863136, -0.17811738, -0.26374168, -0.20641998],
       [-0.13399375, -0.17319654, -0.22288563, -0.19239285],
       [-0.13791454, -0.17234729, -0.22765365, -0.19235224],
       [-0.13913641, -0.17028766, -0.2231286 , -0.19635728],
       [-0.14188773, -0.16913909, -0.22364764, -0.20191067],
       [-0.14403334, -0.16872706, -0.21975991, -0.20071864],
       [-0.14613069, -0.16765529, -0.222601  , -0.20115589],
       [-0.14797337, -0.16602803, -0.22190058, -0.20144961],
       [-0.1497215 , -0.16480595, -0.22240402, -0.20221946],
       [-0.15145694, -0.16945705, -0.22272971, -0.20200036],
       [-0.15301339, -0.17126937, -0.22302495, -0.2018672 ],
       [-0.15431839, -0.1719984 , -0.22298405, -0.20162245],
       [-0.15572542, -0.

In [17]:
np.min(generatedData, 0)

array([[0.32685173, 0.2656848 , 0.31991613, 0.16852833],
       [0.3196702 , 0.27312535, 0.21213983, 0.18733701],
       [0.3045877 , 0.28801054, 0.23521663, 0.22052139],
       [0.3373072 , 0.28877795, 0.2330635 , 0.22444998],
       [0.35311183, 0.29325944, 0.22606942, 0.24421445],
       [0.35803378, 0.2982716 , 0.26819432, 0.26058227],
       [0.3538726 , 0.2991366 , 0.26327822, 0.26062965],
       [0.3525758 , 0.30123445, 0.2679438 , 0.2559563 ],
       [0.3496558 , 0.30240434, 0.26740864, 0.24947622],
       [0.34737864, 0.30282402, 0.2714171 , 0.25086716],
       [0.3451527 , 0.30391568, 0.26848778, 0.25035694],
       [0.34319705, 0.30557314, 0.26920995, 0.25001422],
       [0.34134173, 0.3068179 , 0.26869088, 0.2491159 ],
       [0.3394999 , 0.30208048, 0.26835507, 0.24937156],
       [0.337848  , 0.30023453, 0.26805067, 0.24952695],
       [0.336463  , 0.29949197, 0.26809284, 0.24981253],
       [0.3349697 , 0.29863894, 0.26844278, 0.24983333],
       [0.33364996, 0.29916996,

In [18]:
output_array = np.array(inverted)

# Reshape the array to 2D. The new shape will have 10000 * 24 rows and 5 columns.
array_reshaped = output_array.reshape(-1, output_array.shape[-1])

# Create a DataFrame from the 2D array
df = pd.DataFrame(array_reshaped)

# Write the DataFrame to a CSV file
df.to_csv('data/SytheticAudioData2.csv', index=False)

In [19]:
ipd.Audio(np.array(array_reshaped).T, rate=44100, autoplay=False) #

In [20]:
np.array(origData).shape

(94464, 4)

In [21]:
np.array(generatedData).T.shape

(4, 24, 3935)

In [22]:
np.array(dataX).shape

(3935, 24, 4)

In [23]:
merged = []
for sequence in np.array(generatedData):
    merged.extend(sequence)

In [24]:
np.array(merged).shape

(94440, 4)

In [25]:
ipd.Audio(np.array(merged).T, rate=22050, autoplay=False) #

In [26]:
inverted = merged * (maxData - minData) + minData

In [27]:
ipd.Audio(np.array(inverted).T, rate=22050, autoplay=False)

In [28]:
np.array(inverted).shape

(94440, 4)

In [29]:
flipped = origData[::-1]
ipd.Audio(np.array(flipped).T, rate=22050, autoplay=False)

In [30]:
ipd.Audio(np.array(origData[1:1 + 24]),  rate=22050, autoplay=False)

In [32]:
newmerged = []
for sequence in np.array(generatedData):
    newmerged.append(sequence[0])

In [33]:
np.array(generatedData)[0][0]

array([0.55529225, 0.6015346 , 0.47972688, 0.54958385], dtype=float32)

In [34]:
np.array(newmerged).shape

(3935, 4)

In [35]:
newmerged

[array([0.55529225, 0.6015346 , 0.47972688, 0.54958385], dtype=float32),
 array([0.49827868, 0.46640244, 0.5005554 , 0.48077846], dtype=float32),
 array([0.49638468, 0.46482667, 0.4982419 , 0.47855   ], dtype=float32),
 array([0.46617985, 0.44320244, 0.49636734, 0.4842424 ], dtype=float32),
 array([0.5006721 , 0.4672416 , 0.5036162 , 0.48240194], dtype=float32),
 array([0.49689588, 0.46681628, 0.49910593, 0.48119512], dtype=float32),
 array([0.49741727, 0.46510863, 0.4997537 , 0.47939527], dtype=float32),
 array([0.48334175, 0.4966934 , 0.5033952 , 0.5342474 ], dtype=float32),
 array([0.49036744, 0.4856615 , 0.5010766 , 0.5116449 ], dtype=float32),
 array([0.57289237, 0.6020383 , 0.46585396, 0.5168028 ], dtype=float32),
 array([0.44683972, 0.4149478 , 0.48471674, 0.4612985 ], dtype=float32),
 array([0.346284 , 0.3419788, 0.4929102, 0.4909094], dtype=float32),
 array([0.4979692 , 0.46631667, 0.49848375, 0.47874072], dtype=float32),
 array([0.50795716, 0.46894687, 0.51039666, 0.48385423]

In [36]:
ipd.Audio(np.array(newmerged).T,  rate=22050, autoplay=False)

In [37]:
newInverted = newmerged * (maxData - minData) + minData

In [38]:
ipd.Audio(np.array(newInverted).T,  rate=22050, autoplay=False)

In [39]:
generatedData

[array([[0.55529225, 0.6015346 , 0.47972688, 0.54958385],
        [0.56833684, 0.6398578 , 0.48225757, 0.580899  ],
        [0.56271535, 0.66058046, 0.46104455, 0.5866006 ],
        [0.5648783 , 0.6527689 , 0.48959163, 0.60154885],
        [0.5598274 , 0.66048974, 0.4713343 , 0.6003872 ],
        [0.5599987 , 0.6590564 , 0.48114654, 0.6078767 ],
        [0.55478865, 0.66054016, 0.46967933, 0.60429674],
        [0.556124  , 0.6579169 , 0.4748835 , 0.60506827],
        [0.55338377, 0.65721774, 0.46949616, 0.60210043],
        [0.5541995 , 0.6566861 , 0.47042313, 0.6018513 ],
        [0.55115217, 0.65492713, 0.46770892, 0.6006123 ],
        [0.55017734, 0.6542297 , 0.46700427, 0.6007209 ],
        [0.54776466, 0.6526301 , 0.46609956, 0.60067284],
        [0.54639673, 0.6523552 , 0.4650393 , 0.60106564],
        [0.54428536, 0.65120816, 0.46472582, 0.6017394 ],
        [0.54258025, 0.6508877 , 0.46371448, 0.6024065 ],
        [0.54066974, 0.65003765, 0.46358994, 0.603415  ],
        [0.539

In [40]:

newInverted

array([[ 0.05186914,  0.12454133, -0.01772461,  0.05527996],
       [-0.00185077, -0.00812887,  0.00247654, -0.0036859 ],
       [-0.00363535, -0.00967593,  0.00023273, -0.00559568],
       ...,
       [-0.00090998,  0.02320492,  0.00019587,  0.0242228 ],
       [-0.00522404, -0.00750927, -0.064179  , -0.06511002],
       [-0.00305228, -0.00929661,  0.00054704, -0.00549674]])