# Metric Presentation and Visualization

In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import matplotlib.pyplot as plt
import scienceplots
plt.style.use('science')

from discriminative_metrics import discriminative_score_metrics
from predictive_metrics import predictive_score_metrics
from metric_utils import generate_ks_results, visualization, display_scores

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
seed = 0
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x11a40d1d0>

In [None]:
'''
import pandas as pd 
df = pd.read_csv('/Users/timot/Desktop/Sig_Diffusions_DH/data/AMZN.csv').set_index('Date')
df2 = df
df2_ret = np.log(df2 / df2.shift(1)).dropna()
np.save('/Users/timot/Desktop/Sig_Diffusions_DH/data/amzn_returns', df2_ret.to_numpy())
'''

In [74]:
df2.shape[0] - int(df2.shape[0] * 0.7)

1228

## Data Loading

In [3]:
# specify the --name argument
experiment_name = "amzn_returns"

In [4]:
iterations = 5
real_data = np.load(f'../data/real_paths/{experiment_name}.npy')
generated_data = np.load(f'../data/generated_paths/{experiment_name}.npy')

In [5]:
generated_data.shape

(20000, 30, 1)

In [6]:
real_data.min()

np.float64(-0.15139791287665308)

In [7]:
# minmax scale the inputs for fair comparison
data_min = np.min(real_data, axis=(0,1), keepdims=True)
data_max = np.max(real_data, axis=(0,1), keepdims=True)

real_data = (real_data - data_min) / (data_max - data_min)
generated_data = (generated_data - data_min) / (data_max - data_min)

In [8]:
# test set
real_data = real_data[-1000:]

In [9]:
num_samples, seq_len, dim = real_data.shape
real_data.shape, generated_data.shape

((1000, 30, 1), (20000, 30, 1))

## Discriminative and Predictive Scores

In [87]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(real_data, generated_data)
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, '\n')
      
print(f'{experiment_name}:')
display_scores(discriminative_score)
print()

training: 100%|██████████| 2000/2000 [00:10<00:00, 192.75it/s]


Iter 0:  0.4571428571428572 



training: 100%|██████████| 2000/2000 [00:10<00:00, 195.36it/s]


Iter 1:  0.4519047619047619 



training: 100%|██████████| 2000/2000 [00:10<00:00, 196.90it/s]


Iter 2:  0.4066666666666666 



training: 100%|██████████| 2000/2000 [00:10<00:00, 194.40it/s]


Iter 3:  0.38976190476190475 



training: 100%|██████████| 2000/2000 [00:10<00:00, 192.42it/s]

Iter 4:  0.4073809523809524 

aapl_returns:
Final Score:  0.4225714285714286 ± 0.026891652937899285






In [42]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(real_data, generated_data)
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print(f'{experiment_name}:')
display_scores(predictive_score)
print()

training: 100%|██████████| 5000/5000 [00:13<00:00, 371.48it/s]


0  epoch:  0.05431245403364301 



training: 100%|██████████| 5000/5000 [00:13<00:00, 369.08it/s]


1  epoch:  0.05981762884836644 



training: 100%|██████████| 5000/5000 [00:13<00:00, 370.57it/s]


2  epoch:  0.06689297918649391 



training: 100%|██████████| 5000/5000 [00:13<00:00, 369.62it/s]


3  epoch:  0.04993116842582822 



training: 100%|██████████| 5000/5000 [00:13<00:00, 368.47it/s]

4  epoch:  0.05064315940253437 

stocks_paths:
Final Score:  0.056319477979373186 ± 0.006341848688444645






## KS Test Scores on Marginals

In [10]:
pct_marginals = (0.3, 0.5, 0.7, 0.9)
real_dataloader = DataLoader(TensorDataset(torch.tensor(real_data)), batch_size=64, shuffle=True)
generated_dataloader = DataLoader(TensorDataset(torch.tensor(generated_data)), batch_size=64, shuffle=True)
infinite_real_dataloader = (elem for it in iter(lambda: real_dataloader, None) for elem in it)
infinite_generated_dataloader = (elem for it in iter(lambda: generated_dataloader, None) for elem in it)
ks_results = generate_ks_results(infinite_real_dataloader, infinite_generated_dataloader, pct_marginals, 1000, dims=dim)
ks_stats = ks_results[:,:,:,0]
ks_pvals = ks_results[:,:,:,1]

In [11]:
mean_score = np.mean(ks_stats, axis=0)
std_score = np.std(ks_stats, axis=0)
percent_reject = np.mean(ks_pvals <= 0.05, axis=0)
# print as a table
mean_score_per_marginal = np.mean(mean_score, axis=0)
std_score_per_marginal = np.mean(std_score, axis=0)
mean_pct_reject_per_marginal = np.mean(percent_reject, axis=0)
print('KS Test:')
print('Marginal\tMean KS\t% Reject')
for i, pct in enumerate(pct_marginals):
    print(f'{pct}\t{mean_score_per_marginal[i]:.2f}\t{mean_pct_reject_per_marginal[i]*100:.2f}')

KS Test:
Marginal	Mean KS	% Reject
0.3	0.32	91.70
0.5	0.29	78.90
0.7	0.30	80.70
0.9	0.45	99.80
