In [None]:
!git clone https://github.com/priyank96/idl-spring-22-project-deepar

Cloning into 'idl-spring-22-project-deepar'...
remote: Enumerating objects: 1677, done.[K
remote: Counting objects: 100% (1677/1677), done.[K
remote: Compressing objects: 100% (914/914), done.[K
remote: Total 1677 (delta 786), reused 1621 (delta 757), pack-reused 0[K
Receiving objects: 100% (1677/1677), 81.12 MiB | 22.42 MiB/s, done.
Resolving deltas: 100% (786/786), done.
Checking out files: 100% (1934/1934), done.


In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import logging
from sklearn import preprocessing
import torch.optim as optim
from tqdm import tqdm
import os
device = 'cuda' if torch.cuda.is_available() else 'cpu'


In [None]:
!python3 /content/idl-spring-22-project-deepar/src/data/company_data.py

  return (a - mns) / sstd
 12% 134/1151 [00:41<04:30,  3.77it/s]

In [None]:
import pickle
with open('/content/idl-spring-22-project-deepar/data/index_to_company.pkl','rb') as f:
  index_to_company = pickle.load(f)

with open('/content/idl-spring-22-project-deepar/data/company_to_index.pkl','rb') as f:
  company_to_index = pickle.load(f)

In [None]:
print(index_to_company)
print(company_to_index)

{0: 'MCD', 1: 'BEEM', 2: 'XXII', 3: 'RWB', 4: 'PSTV', 5: 'OPAD', 6: 'XHB', 7: 'DBMM', 8: 'V', 9: 'AMT'}
{'MCD': 0, 'BEEM': 1, 'XXII': 2, 'RWB': 3, 'PSTV': 4, 'OPAD': 5, 'XHB': 6, 'DBMM': 7, 'V': 8, 'AMT': 9}


In [None]:
DATA_PATH = '/content/idl-spring-22-project-deepar/data'



stock_inputs = np.load(DATA_PATH + '/stock_inputs.npy', allow_pickle=True)
stock_labels = np.load(DATA_PATH + '/stock_labels.npy', allow_pickle=True)

stock_test_inputs = np.load(DATA_PATH + '/stock_test_inputs.npy',allow_pickle=True)
stock_test_labels = np.load(DATA_PATH + '/stock_test_labels.npy',allow_pickle=True)

In [None]:
NUM_TRAIN_SAMPLES = 10000
NUM_TEST_SAMPLES  = 3800

stock_inputs_trimmed = stock_inputs[:NUM_TRAIN_SAMPLES,:,:]
stock_labels_trimmed = stock_labels[:NUM_TRAIN_SAMPLES,:]

stock_test_inputs_trimmed = stock_test_inputs[:NUM_TEST_SAMPLES,:,:]
stock_test_labels_trimmed = stock_test_labels[:NUM_TEST_SAMPLES,:]

In [None]:
print('stock_inputs_trimmed shape', stock_inputs_trimmed.shape)
print('stock_labels_trimmed shape', stock_labels_trimmed.shape)
print('stock_test_inputs_trimmed shape', stock_test_inputs_trimmed.shape)
print('stock_test_labels_trimmed shape', stock_test_labels_trimmed.shape)

stock_inputs_trimmed shape (10000, 192, 5)
stock_labels_trimmed shape (10000, 192)
stock_test_inputs_trimmed shape (3800, 192, 5)
stock_test_labels_trimmed shape (3800, 192)


In [None]:
# On the Filtered input, check the number of actual companies retained

train_comp_ids = set()
test_comp_ids  = set()

# Every Seq in the window will belong to the same company
# Hence seq_id = 0
# cov_id = -1 (last index)
seq_id = 0
cov_id = -1

for sample in range(0, NUM_TRAIN_SAMPLES):
  train_comp_ids.add(stock_inputs_trimmed[sample][seq_id][cov_id])

for sample in range(0, NUM_TEST_SAMPLES):
  test_comp_ids.add(stock_test_inputs_trimmed[sample][seq_id][cov_id])




In [None]:
print(sorted(train_comp_ids))
print(len(train_comp_ids))
print(sorted(test_comp_ids))
print(len(test_comp_ids))


[0.0, 1.0, 2.0, 3.0, 4.0]
5
[0.0, 1.0, 2.0, 4.0]
4


In [None]:

params = {
    'num_classes': len(train_comp_ids),
    'embedding_dim':5,
    'cov_dim': 4,
    'lstm_hidden_dim': 64,
    'lstm_layers':4 ,
    'window_size':192,
    'batch_size': 96,
    'learning_rate': 1e-3,
    'epochs':20,
    'num_test_samples': NUM_TEST_SAMPLES,
    'num_train_samples': NUM_TRAIN_SAMPLES,
    'conditioning_period': 168,
    'prediction_period': 24
}

print(params)

{'num_classes': 5, 'embedding_dim': 5, 'cov_dim': 4, 'lstm_hidden_dim': 64, 'lstm_layers': 4, 'window_size': 192, 'batch_size': 96, 'learning_rate': 0.001, 'epochs': 20, 'num_test_samples': 3800, 'num_train_samples': 10000, 'conditioning_period': 168, 'prediction_period': 24}


In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset, Sampler
from pathlib import Path
import sys

DATA_PATH = '/content/idl-spring-22-project-deepar/data'


class TrainDataset(Dataset):
    def __init__(self):
        self.data = stock_inputs_trimmed
        self.label = stock_labels_trimmed
        self.train_len = self.data.shape[0]


    def __len__(self):
        return self.train_len
  
    def __getitem__(self, index):
        x1 = torch.from_numpy(self.data[index].astype(np.float32))
        x2 = torch.from_numpy(self.label[index].astype(np.float32))
        return x1, x2  

class TestDataset(Dataset):
    def __init__(self):
        self.data = stock_test_inputs_trimmed
        self.label = stock_test_labels_trimmed
        self.test_len = self.data.shape[0]
        
    def __len__(self):
        return self.test_len

    def __getitem__(self, index):
      x1 = torch.from_numpy(self.data[index].astype(np.float32))
      x2 = torch.from_numpy(self.label[index].astype(np.float32))

      return x1, x2


In [None]:
def unwindow_sequence(dataset):
  company_data = {}

  seq_id = 0
  company_index_cov_id = -1

  for cov_ip, cov_op in dataset:

    comp_index = cov_ip[seq_id][company_index_cov_id]
    company_name = index_to_company[comp_index.item()]

    # IP Shape (Time, Dim)
    # OP Shape (Time, 1) # One covariate (open price) per time instant
    if company_data.get(company_name, None) is None:
      company_data[company_name] = [cov_ip, cov_op]
      
    else:
      # windows are created with stride 1
      # so we should be appending only the last element in the sequence
      cov_ip_last = cov_ip[-1, :].unsqueeze(0)
      cov_op_last = cov_op[-1].unsqueeze(0)

      # 0 --> Covariates
      # 1 --> Outputs, aka stock price
      company_data[company_name][0] = torch.cat((company_data[company_name][0], cov_ip_last), dim=0)
      company_data[company_name][1] = torch.cat((company_data[company_name][1], cov_op_last), dim=0)

  return company_data

In [None]:
train_data = TrainDataset()
test_data  = TestDataset()

train_loader = torch.utils.data.DataLoader(train_data, batch_size=params['batch_size'], shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=params['batch_size'], shuffle=False)

train_data_unwindowed = unwindow_sequence(train_data)
test_data_unwindowed = unwindow_sequence(test_data)

In [None]:
print(train_data_unwindowed.keys())
print(len(train_data_unwindowed))

dict_keys(['MCD', 'BEEM', 'XXII', 'RWB', 'PSTV'])
5


In [None]:
print(test_data_unwindowed.keys())
print(len(test_data_unwindowed))
# Some Companies are there in the Train Set, But not in the Test Set. 
# However, all companies in the Test Set is there in the Train Set

dict_keys(['MCD', 'BEEM', 'XXII', 'PSTV'])
4


In [None]:
import plotly.express as px

def plot_output(company_data, company_name):
  # 1st index is output (prediction)
  x=np.linspace(1, len(company_data[company_name][1]), num=len(company_data[company_name][1]))
  fig = px.line(x=x, y=company_data[company_name][1])
  fig.show()
  print("op", company_data[company_name][1])

def plot_input_covariate(company_data, company_name, covariate_index):
  # 0th index is input (the covariates)
  x=np.linspace(1, company_data[company_name][0].shape[0], num=len(company_data[company_name][0][:,covariate_index]))
  fig = px.line(x=x, y=company_data[company_name][0][:,covariate_index])
  fig.show()
  print("ip", company_data[company_name][0][:,covariate_index])

In [None]:
# SANITY Check One: Output Label (open price) is shifted by one element of input (the ip opening price)
plot_output(train_data_unwindowed, 'MCD')
plot_input_covariate(train_data_unwindowed, 'MCD', 0)

In [None]:

# SANITY Check TWO: Train and Test data are contiguous
plot_input_covariate(train_data_unwindowed, 'MCD', 0)
plot_input_covariate(test_data_unwindowed, 'MCD', 0)


In [None]:
# SANITY Check TWO for Output Labels: Train and Test data are contiguous

plot_output(train_data_unwindowed, 'MCD')
plot_output(test_data_unwindowed, 'MCD')

In [None]:
plot_input_covariate(train_data_unwindowed, 'MCD', 0)  # Opening Price
plot_input_covariate(train_data_unwindowed, 'MCD', 1)  # Volume
plot_input_covariate(train_data_unwindowed, 'MCD', 2)  # Day
plot_input_covariate(train_data_unwindowed, 'MCD', 3)  # Month
plot_input_covariate(train_data_unwindowed, 'MCD', 4)  # Company ID


In [None]:
# Sanity Check - Plot Windows

import plotly.express as px

def plot_output_windowed(company_data, window_id):
  # 1st index is output (prediction)
  x=np.linspace(1, len(company_data[window_id][1]), num=len(company_data[window_id][1]))
  fig = px.line(x=x, y=company_data[window_id][1])
  fig.show()
  print("op", company_data[window_id][1])

def plot_input_covariate_windowed(company_data, window_id, covariate_index):
  # 0th index is input (the covariates)
  x=np.linspace(1, company_data[window_id][0].shape[0], num=len(company_data[window_id][0][:,covariate_index]))
  fig = px.line(x=x, y=company_data[window_id][0][:,covariate_index])
  fig.show()
  print("ip", company_data[window_id][0][:,covariate_index])


In [None]:
plot_output_windowed(test_data, 0)
plot_input_covariate_windowed(test_data, 0, 0)

In [None]:
print('train data shape', train_data[0][0].shape)

train data shape torch.Size([192, 5])


# MODEL


In [None]:
class Network(nn.Module):
    def __init__(self, params):
        '''
        We define a recurrent network that predicts the 
        future values of a time-dependent variable based on
        past inputs and covariates.
        '''
        super(Network, self).__init__()
        self.params = params
        self.embedding = nn.Embedding(params['num_classes'], params['embedding_dim'])

        self.lstm = nn.LSTM(input_size=params['cov_dim']+params['embedding_dim'],
                            hidden_size=params['lstm_hidden_dim'],
                            num_layers=params['lstm_layers'],
                            bias=True,
                            batch_first=True,
                          )

        self.distribution_mu = nn.Linear(params['lstm_hidden_dim'], 1)
        self.distribution_presigma = nn.Linear(params['lstm_hidden_dim'], 1)
        self.distribution_sigma = nn.Softplus()


    def forward(self, x, h0_c0=None):
        '''
        Predict mu and sigma of the distribution for z_t.
        '''
        cov = x[:, :, :-1]   # remove the company index from the inputs to get the covariates

        company_index = x[:, 0, -1].to(torch.int32)  # retrieve the company index from the covariates
        onehot_embed = self.embedding(company_index)
        
        batch_size = cov.shape[0]
        seq_len = cov.shape[1]
        cov_dim = cov.shape[2]

        assert cov_dim == params['cov_dim']
        assert batch_size <= params['batch_size']

        ohe_embed_all_timestamps = onehot_embed.unsqueeze(1).repeat(1,seq_len,1)

        lstm_input = torch.cat(
            (cov,
             ohe_embed_all_timestamps
            ), dim=2
          )
        
        assert lstm_input.shape[0] <= params['batch_size']
        assert lstm_input.shape[1] == seq_len
        assert lstm_input.shape[2] == params['cov_dim'] + params['embedding_dim']

        out1, hn_cn = self.lstm(input=lstm_input, hx=h0_c0)
              
        out_mu = self.distribution_mu(out1)
        
        out_presigma = self.distribution_presigma(out1)
        out_sigma = self.distribution_sigma(out_presigma)

        return out_mu, out_sigma, hn_cn



In [None]:
model = Network(params)

In [None]:
for i, (ip_covariate, op_label) in enumerate(train_loader):
  out_mu, out_sigma, ht_ct = model.forward(x=ip_covariate)

  print('out mu shape', out_mu.shape)
  print('out_sigma shape', out_sigma.shape)
  print('ht shape', ht_ct[0].shape, ht_ct[1].shape)


  break

out mu shape torch.Size([96, 192, 1])
out_sigma shape torch.Size([96, 192, 1])
ht shape torch.Size([4, 96, 64]) torch.Size([4, 96, 64])


In [None]:
def loss_fn(mu,sigma,labels):
  mu = torch.squeeze(mu)
  sigma = torch.squeeze(sigma)
  total_likelihood = 0
  for i in range(mu.shape[0]):  # loop through batch
    for j in range(mu.shape[1]):# each batch  loop through time steps
      distribution = torch.distributions.normal.Normal(mu[i][j] , sigma[i][j]) # scaling the values by 10 to avoid small sigma values
      total_likelihood += distribution.log_prob(labels[i][j])
  return -1*total_likelihood/(mu.shape[0]*mu.shape[1])

In [None]:
def accuracy_RMSE_train(mu: torch.Tensor, labels: torch.Tensor):
  mu = mu.cpu().detach().numpy()
  labels = labels.cpu().detach().numpy()

  diff = np.sum((mu - labels) ** 2, axis=1)
  summation = np.sum(np.abs(labels), axis=1)
  result = (np.sqrt(diff) / summation)
  n = len(result)
  result = np.sum(result)/n
  return result

In [None]:
def accuracy_RMSE_test(predictions: torch.Tensor, labels: torch.Tensor):
  predictions = predictions.cpu().detach().numpy()
  labels = labels.cpu().detach().numpy()

  diff = np.sum((predictions - labels) ** 2, axis=1)
  summation = np.sum(np.abs(labels), axis=1)
  result = (np.sqrt(diff) / summation)
  n = len(result)
  result = np.sum(result)/n
  return result

In [None]:
conditionining_period = params['conditioning_period']
prediction_period     = params['prediction_period']

assert conditionining_period + prediction_period == params['window_size']

In [None]:
def validate(model):

  model.eval()
  model.cuda()
  total_RMSE = 0

  overall_mu = None
  overall_sigma = None
  overall_label = None

  with torch.no_grad():
    # batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, leave=False, position=0, desc='Test') 

    for i, (ip_covariate, op_label) in enumerate(test_loader):
      ip_covariate = ip_covariate.cuda()
      op_label     = op_label.cuda()

      cond_ip = ip_covariate[:, 0:conditionining_period, :]
      cond_op = op_label[:, 0:conditionining_period]

      pred_ip = ip_covariate[:, conditionining_period: , :]
      pred_op = op_label[:,  conditionining_period: ]

      # Step One - Forward Pass : Conditioning Period    
      mu, sigma, ht_ct = model(x=cond_ip, h0_c0=None)

      batch_mu = mu.squeeze()
      batch_sigma = sigma.squeeze()

      # Initialize pred_mu for the first time instance of the "prediction period"
      # from the value of the "predicted mu" from the last instance of the "conditioning period"
      pred_mu, pred_sigma = mu[:, -1, :].unsqueeze(1), sigma[:, -1, :].unsqueeze(1)

      for t in range(0, prediction_period):
        pred_cov_ip = pred_ip[:, t, :].unsqueeze(1)
        pred_cov_ip[:, 0, 0] = pred_mu[:, 0, 0]
        
        pred_mu, pred_sigma, ht_ct = model(x=pred_cov_ip, h0_c0=ht_ct)
        
        batch_mu = torch.cat((batch_mu,  pred_mu.squeeze(2)), dim=1)
        batch_sigma = torch.cat((batch_sigma, pred_sigma.squeeze(2)), dim=1)


      if overall_mu is None and overall_sigma is None:
        overall_mu = batch_mu
        overall_sigma = batch_sigma
        overall_label = op_label
      else:
        overall_mu = torch.cat((overall_mu,  batch_mu), dim=0)
        overall_sigma = torch.cat((overall_sigma, batch_sigma), dim=0)
        overall_label = torch.cat((overall_label, op_label), dim=0)
        
  rmse = accuracy_RMSE_test(overall_mu, overall_label)

  return rmse







In [None]:
# rmse = accuracy_RMSE_train(overall_mu, overall_label)
# print("rmse ", rmse)

In [None]:
def plot_output_data(data, window_id):
  # 1st index is output (prediction)
  x=np.linspace(1, len(data[window_id]), num=len(data[window_id]))
  fig = px.line(x=x, y=data[window_id])
  fig.show()
  # print("op", data[window_id])


# TRAIN

In [None]:
model_version='deepar_model_refined_v1.pt'
epochs = params['epochs']

best_dev_rmse = 10000

model = Network(params)
optimizer = torch.optim.Adam(model.parameters(), lr=params['learning_rate'])
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

print(model)

for epoch in range(0, epochs):
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
    model.train()
    model.cuda()

    total_loss = 0

    if os.path.exists(f'/content/drive/MyDrive/DeepARExperiments/{model_version}'):
        # model.load_state_dict(torch.load(f'{SAVE_PATH}{EXP_TAG}/model_saved_epoch{epoch-1}.pt')) 

        checkpoint = torch.load(f'/content/drive/MyDrive/DeepARExperiments/{model_version}')
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch = checkpoint['epoch'] + 1

    for i, (ip_covariate, op_label) in enumerate(train_loader):
        optimizer.zero_grad()

        ip_covariate = ip_covariate.cuda()
        op_label     = op_label.cuda()

        mu, sigma, ht_ct = model(x=ip_covariate, h0_c0=None)

        # print('mu shape', mu.shape, 'sigma shape', sigma.shape, 'op label', op_label.shape)
        loss = loss_fn(mu, sigma, op_label)

        # print("loss", loss)

        total_loss += float(loss)
        loss.backward()
        optimizer.step()

        # tqdm lets you add some details so you can monitor training as you train.
        batch_bar.set_postfix(
            loss="{:.04f}".format(float(total_loss / (i + 1))),
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        batch_bar.update() 

    batch_bar.close() # You need this to close the tqdm bar
    val_rmse= validate(model=model)
    scheduler.step()

    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': total_loss/len(train_loader),
            },  f'/content/drive/MyDrive/DeepARExperiments/{model_version}')
  

    if val_rmse < best_dev_rmse:
      best_dev_rmse = val_rmse
      torch.save({
              'epoch': epoch,
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              'val_rmse': val_rmse,
              'loss': total_loss/len(train_loader),
              },  f'/content/drive/MyDrive/DeepARExperiments/best_dev_acc_{best_dev_rmse}_{model_version}')
    
    print("Epoch {}/{}: Train Loss {:.04f}, Learning Rate {:.04f}".format(
        epoch + 1,
        epochs,
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr'])))
    print("rmse is ", val_rmse)

Network(
  (embedding): Embedding(5, 5)
  (lstm): LSTM(9, 64, num_layers=4, batch_first=True)
  (distribution_mu): Linear(in_features=64, out_features=1, bias=True)
  (distribution_presigma): Linear(in_features=64, out_features=1, bias=True)
  (distribution_sigma): Softplus(beta=1, threshold=20)
)




Epoch 1/20: Train Loss -0.0018, Learning Rate 0.0009
rmse is  0.03337807906301398




Epoch 2/20: Train Loss -0.8738, Learning Rate 0.0008
rmse is  0.035066721062911185




Epoch 3/20: Train Loss -0.8931, Learning Rate 0.0007
rmse is  0.03652589095266242




Epoch 4/20: Train Loss -1.1262, Learning Rate 0.0007
rmse is  0.03405877364309211




Epoch 5/20: Train Loss -1.3426, Learning Rate 0.0006
rmse is  0.038799787822522615




Epoch 6/20: Train Loss -1.4072, Learning Rate 0.0005
rmse is  0.03297350632516961




Epoch 7/20: Train Loss -1.8283, Learning Rate 0.0005
rmse is  0.03312451212029708




Epoch 8/20: Train Loss -1.8957, Learning Rate 0.0004
rmse is  0.033601050125925166


Train:  45%|████▍     | 47/105 [09:52<12:08, 12.57s/it, loss=-1.8698, lr=0.0004]

KeyboardInterrupt: ignored

In [None]:
checkpoint = torch.load(f'/content/deepar_model_refined_v1.pt')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [None]:


model.eval()
model.cuda()


total_RMSE = 0

overall_mu = None
overall_sigma = None
overall_label = None

with torch.no_grad():
  # batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, leave=False, position=0, desc='Test') 

  for i, (ip_covariate, op_label) in enumerate(test_loader):
    ip_covariate = ip_covariate.cuda()
    op_label     = op_label.cuda()

    cond_ip = ip_covariate[:, 0:conditionining_period, :]
    cond_op = op_label[:, 0:conditionining_period]

    pred_ip = ip_covariate[:, conditionining_period: , :]
    pred_op = op_label[:,  conditionining_period: ]

    # Step One - Forward Pass : Conditioning Period    
    mu, sigma, ht_ct = model(x=cond_ip, h0_c0=None)

    batch_mu = mu.squeeze()
    batch_sigma = sigma.squeeze()

    # Initialize pred_mu for the first time instance of the "prediction period"
    # from the value of the "predicted mu" from the last instance of the "conditioning period"
    pred_mu, pred_sigma = mu[:, -1, :].unsqueeze(1), sigma[:, -1, :].unsqueeze(1)

    for t in range(0, prediction_period):
      pred_cov_ip = pred_ip[:, t, :].unsqueeze(1)
      pred_cov_ip[:, 0, 0] = pred_mu[:, 0, 0]
      
      pred_mu, pred_sigma, ht_ct = model(x=pred_cov_ip, h0_c0=ht_ct)
      
      batch_mu = torch.cat((batch_mu,  pred_mu.squeeze(2)), dim=1)
      batch_sigma = torch.cat((batch_sigma, pred_sigma.squeeze(2)), dim=1)


    if overall_mu is None and overall_sigma is None:
      overall_mu = batch_mu
      overall_sigma = batch_sigma
      overall_label = op_label
    else:
      overall_mu = torch.cat((overall_mu,  batch_mu), dim=0)
      overall_sigma = torch.cat((overall_sigma, batch_sigma), dim=0)
      overall_label = torch.cat((overall_label, op_label), dim=0)

In [None]:
import pandas as pd
import plotly.graph_objects as go
def plot_test_output_data(actual,predicted, window_id):
  x=np.linspace(1, len(actual[window_id]), num=len(actual[window_id]))
  df = pd.DataFrame()
  df['actual'] = actual[window_id]
  df['predicted'] = predicted[window_id]

  fig = go.Figure()
  fig.add_vline(x=168)
  fig.add_traces(go.Scatter(x=x, y = df['actual'], mode = 'lines', name = 'Actual'))
  fig.add_traces(go.Scatter(x=x, y = df['predicted'], mode = 'lines', name = 'Predicted'))
  fig.show()

In [None]:
# #s for the paper 3500 420
plot_test_output_data(overall_label.cpu().numpy(),overall_mu.cpu().numpy(), 2500  )