In [1]:
import matplotlib.pyplot as plt
import numpy as np
import timeautoencoder as tae
import timediffusion as tdf
import DP_Sliding as dp
import pandas as pd
import torch
import os
import time 
import process_edited as pce
import random
from torch.utils.data import random_split
import timeautoencoder_impute as taei
import CSDI

In [None]:
# ETTh-small : https://github.com/zhouhaoyi/ETDataset/tree/main
time_duration = []

data = 'C:/Users/namjo/OneDrive/Desktop/TimeAutoDiff/Dataset/stock_data'
filename = f'{data}.csv'

### Read dataframe
print(filename)
real_df = pd.read_csv(filename)
real_df1 = real_df.drop('date', axis=1).iloc[:2000,:]
real_df2 = real_df.iloc[:2000,:]

##############################################################################################################################
### Pre-processing Data
threshold = 1; device = 'cuda'
processed_data = dp.splitData(real_df1, 48, 1).to(device)
time_info = dp.splitTimeData(real_df2, 48).to(device)

In [None]:
# Parameter settings : See Appendix D in the paper
##############################################################################################################################
# Auto-encoder Training
n_epochs = 20000; eps = 1e-5
weight_decay = 1e-6 ; lr = 2e-4; hidden_dim = 200; num_layers = 2; batch_size = 100
channels = 64; min_beta = 1e-5; max_beta = 0.1; emb_dim = 128; time_dim = 8; lat_dim = processed_data.shape[2]; threshold = 1
ds = tae.train_autoencoder(real_df1, processed_data, channels, hidden_dim, num_layers, lr, weight_decay, n_epochs, \
                           batch_size, threshold,  min_beta, max_beta, emb_dim, time_dim, lat_dim, device)

##############################################################################################################################
# Diffusion Training
latent_features = ds[1]; 
n_epochs = 20000; hidden_dim = 200; num_layers = 2; diffusion_steps = 100; lambd = 1;
diff = tdf.train_diffusion(latent_features, time_info.to(device), hidden_dim, num_layers, diffusion_steps, lambd, n_epochs)

##############################################################################################################################
# Sampling new data
N, T, F = processed_data.shape
t_grid = torch.linspace(0, 1, T).view(1, -1, 1).to(device) # Note that we can use different sequence length here without any issues
samples = tdf.sample(t_grid.repeat(N, 1, 1), N, T, lat_dim, diff, time_info.to(device))

# Post-process the generated data 
gen_output = ds[0].decoder(samples[0].to(device))  # Apply decoder to generated latent vector

data_size, seq_len, _ = processed_data.shape
synth_data = pce.convert_to_tensor(real_df1, gen_output, threshold, N, T)
_synth_data = pce.convert_to_table(real_df1, synth_data, threshold)
_real_data = pce.convert_to_table(real_df1, processed_data, threshold)

B, L, K = _synth_data.shape

pd_reshaped = _real_data.reshape(B * L, K)
sd_reshaped = _synth_data.cpu().reshape(B * L, K)

real_df = pd.DataFrame(pd_reshaped.cpu().numpy())
synth_df = pd.DataFrame(sd_reshaped.cpu().numpy())

In [None]:
parser = pce.DataFrameParser().fit(real_df1, threshold)
col_name = parser.column_name()

import matplotlib.pyplot as plt
import torch
import numpy as np

# Create subplots
fig, axes = plt.subplots(nrows=1, ncols=K, figsize=(33.1, 23.4/5))

for k in range(K):
    axes[k].hist(pd_reshaped[:, k].cpu().detach(), bins=50, color='blue', alpha=0.5, label='Real')
    axes[k].hist(sd_reshaped[:, k].cpu().detach(), bins=50, color='red', alpha=0.5, label='Synthetic')

    # Adding legends
    axes[k].legend()
    axes[k].set_title(col_name[k], fontsize=15)
    
# Adjust layout to prevent overlapping
plt.tight_layout()
plt.subplots_adjust(wspace=0.4, hspace=0.4)  # Add space between histograms
plt.savefig('hurricane.png', dpi=500)  # Adjust dpi as needed for quality
plt.show()

In [None]:
import numpy as np
import timeautoencoder as tae
import timediffusion as tdf
import pandas as pd
import os
import time
import process_edited as pce
import correl as correl
import Metrics as mt
import matplotlib.pyplot as plt
import random
import predictive_metrics as pdm

In [None]:
#################### Evaluate Metrics #################### 
iterations = 2000
result_disc = []; result_pred = []; result_tmp = []; result_corr = []

for i in range(1):
    random_integers = [random.randint(0, len(real_df)-1) for _ in range(2000)]
    
    a = mt.discriminative_score_metrics(_real_data, _synth_data, iterations)
    b = pdm.predictive_score_metrics(_real_data, _synth_data, 7)
    c = mt.temp_disc_score(_real_data, _synth_data, iterations)
    d = correl.final_correlation(real_df.iloc[random_integers,:], synth_df.iloc[random_integers,:])
    
    result_disc.append(a)
    result_pred.append(b)
    result_tmp.append(c)
    result_corr.append(d)

In [None]:
print(np.mean(result_disc),np.std(result_disc))
print(np.mean(result_pred),np.std(result_pred))
print(np.mean(result_tmp),np.std(result_tmp))
print(np.mean(result_corr),np.std(result_corr))

In [None]:
_synth_data
torch.save(_synth_data, 'stock_data_for_volatility_synthetic.pt')


In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Assume _real_data and _synth_data are your tensors with size [1953, 48, 8]

# Choose an arbitrary sample from _synth_data
random_index = torch.randint(0, _synth_data.shape[0], (1,)).item()

# The selected sample from _synth_data
chosen_synth_data = _synth_data[random_index]

# Choose two arbitrary features (randomly select two indices from the third dimension)
feature_indices = torch.randperm(_synth_data.shape[2])[:2]
feature_1_idx, feature_2_idx = feature_indices.tolist()

# Compute Frobenius distance between the chosen _synth_data and every single _real_data
frobenius_distances = torch.norm(_real_data - chosen_synth_data, dim=[1, 2])

# Find the real data that gives us the closest point to the chosen _synth_data
closest_real_data_index = torch.argmin(frobenius_distances).item()
closest_real_data = _real_data[closest_real_data_index]

# Select the two arbitrary features from the closest _real_data and _synth_data
real_feature_1 = closest_real_data[:, feature_1_idx]  # First feature from _real_data
real_feature_2 = closest_real_data[:, feature_2_idx]  # Second feature from _real_data

synth_feature_1 = chosen_synth_data[:, feature_1_idx]  # First feature from _synth_data
synth_feature_2 = chosen_synth_data[:, feature_2_idx]  # Second feature from _synth_data

# Function to calculate volatility and moving averages
def calculate_metrics(values):
    values_df = pd.DataFrame({"Values": values.numpy()})
    values_df["Volatility"] = values_df["Values"].pct_change().rolling(window=5).std()  # Rolling std as volatility
    values_df["SMA_5"] = values_df["Values"].rolling(window=5).mean()  # Simple moving average (5 steps)
    values_df["EMA_5"] = values_df["Values"].ewm(span=5, adjust=False).mean()  # Exponential moving average
    return values_df

# Calculate metrics for real and synthetic features
real_feature_1_metrics = calculate_metrics(real_feature_1)
real_feature_2_metrics = calculate_metrics(real_feature_2)
synth_feature_1_metrics = calculate_metrics(synth_feature_1)
synth_feature_2_metrics = calculate_metrics(synth_feature_2)

# Plot the data
plt.figure(figsize=(16, 12))

# Synth Feature 1
plt.subplot(2, 2, 1)
plt.plot(synth_feature_1.numpy(), label=f'Synth Feature {feature_1_idx}', color='blue')
plt.plot(synth_feature_1_metrics["SMA_5"], label='SMA (5)', linestyle='--', color='orange')
plt.plot(synth_feature_1_metrics["EMA_5"], label='EMA (5)', linestyle='--', color='green')
plt.ylabel('Value')
plt.xlabel('Time step')
plt.title(f'Synth Feature {feature_1_idx}')
plt.legend(loc="upper left")

# Secondary y-axis for volatility
ax_vol = plt.gca().twinx()
ax_vol.plot(synth_feature_1_metrics["Volatility"], label='Volatility', color='red', linestyle='--', linewidth=0.8)
ax_vol.set_ylabel('Volatility')
ax_vol.legend(loc="upper right")

# Real Feature 1
plt.subplot(2, 2, 2)
plt.plot(real_feature_1.numpy(), label=f'Real Feature {feature_1_idx}', color='green')
plt.plot(real_feature_1_metrics["SMA_5"], label='SMA (5)', linestyle='--', color='orange')
plt.plot(real_feature_1_metrics["EMA_5"], label='EMA (5)', linestyle='--', color='blue')
plt.ylabel('Value')
plt.xlabel('Time step')
plt.title(f'Real Feature {feature_1_idx}')
plt.legend(loc="upper left")

# Secondary y-axis for volatility
ax_vol = plt.gca().twinx()
ax_vol.plot(real_feature_1_metrics["Volatility"], label='Volatility', color='red', linestyle='--', linewidth=0.8)
ax_vol.set_ylabel('Volatility')
ax_vol.legend(loc="upper right")

# Synth Feature 2
plt.subplot(2, 2, 3)
plt.plot(synth_feature_2.numpy(), label=f'Synth Feature {feature_2_idx}', color='blue')
plt.plot(synth_feature_2_metrics["SMA_5"], label='SMA (5)', linestyle='--', color='orange')
plt.plot(synth_feature_2_metrics["EMA_5"], label='EMA (5)', linestyle='--', color='green')
plt.ylabel('Value')
plt.xlabel('Time step')
plt.title(f'Synth Feature {feature_2_idx}')
plt.legend(loc="upper left")

# Secondary y-axis for volatility
ax_vol = plt.gca().twinx()
ax_vol.plot(synth_feature_2_metrics["Volatility"], label='Volatility', color='red', linestyle='--', linewidth=0.8)
ax_vol.set_ylabel('Volatility')
ax_vol.legend(loc="upper right")

# Real Feature 2
plt.subplot(2, 2, 4)
plt.plot(real_feature_2.numpy(), label=f'Real Feature {feature_2_idx}', color='green')
plt.plot(real_feature_2_metrics["SMA_5"], label='SMA (5)', linestyle='--', color='orange')
plt.plot(real_feature_2_metrics["EMA_5"], label='EMA (5)', linestyle='--', color='blue')
plt.ylabel('Value')
plt.xlabel('Time step')
plt.title(f'Real Feature {feature_2_idx}')
plt.legend(loc="upper left")

# Secondary y-axis for volatility
ax_vol = plt.gca().twinx()
ax_vol.plot(real_feature_2_metrics["Volatility"], label='Volatility', color='red', linestyle='--', linewidth=0.8)
ax_vol.set_ylabel('Volatility')
ax_vol.legend(loc="upper right")

plt.tight_layout()
plt.show()


In [None]:
#import tsgm
#import torch
import numpy as np

data = "Hurricane"
file_path = f"C:/Users/namjo/Dropbox/Time Series Tabular/Fake Data/{data}/TimeGAN_hurricane.pt"
TimeGAN = torch.load(file_path)

file_path = f"C:/Users/namjo/Dropbox/Time Series Tabular/Fake Data/{data}/DiffusionTS_{data}.pt"
Diffusion_TS = torch.load(file_path)

file_path = f"C:/Users/namjo/Dropbox/Time Series Tabular/Fake Data/{data}/Doppel_Hurricane.pt"
DoppelGANer_TS = torch.load(file_path)

file_path = f"C:/Users/namjo/Dropbox/Time Series Tabular/Fake Data/{data}/TSGM/tsgm_hurricane_0324.npy"
TSGM = torch.from_numpy(np.load(file_path))

In [None]:
spec_entropy = tsgm.metrics.EntropyMetric()
print(spec_entropy(synth_data.cpu().numpy()))
print(spec_entropy(TimeGAN.cpu().numpy()))
print(spec_entropy(DoppelGANer_TS.cpu().numpy()))
print(spec_entropy(Diffusion_TS.cpu().numpy()))
print(spec_entropy(TSGM.cpu().numpy()))
print(spec_entropy(processed_data.cpu().numpy()))

In [None]:
import tsgm
tsgm.utils.visualize_tsne_unlabeled(processed_data, synth_data, perplexity=5, markersize=75, alpha=0.25)

In [None]:
mmd_metric = tsgm.metrics.MMDMetric()
print(mmd_metric(processed_data.cpu().numpy(), synth_data.cpu().numpy()))
print(mmd_metric(processed_data.cpu().numpy(), TimeGAN.cpu().numpy()))
print(mmd_metric(processed_data[0:1977,:,0:7].cpu().numpy(), DoppelGANer_TS[0:1977,:,:].cpu().numpy()))
print(mmd_metric(processed_data.cpu().numpy(), Diffusion_TS.cpu().float().numpy()))
print(mmd_metric(processed_data.cpu().numpy(), TSGM.cpu().numpy()))
print(mmd_metric(processed_data.cpu().numpy(), processed_data.cpu().numpy()))
