In [3]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
main_path = Path('..').resolve()
sys.path.append(str(main_path))

import seaborn as sns
from src.dataset import MetaStockDataset
from src.utils import ARGProcessor
import torch
import matplotlib.pyplot as plt
torch.__version__

'1.11.0+cu113'

In [34]:
setting_file = Path('.') / 'kdd.yml'

meta_args = ARGProcessor(setting_file=setting_file)
data_kwargs = meta_args.get_args(cls=MetaStockDataset)

meta_train = MetaStockDataset(meta_type='train', **data_kwargs)
meta_valid_time = MetaStockDataset(meta_type='valid-time', **data_kwargs)
meta_valid_stock = MetaStockDataset(meta_type='valid-stock', **data_kwargs)
meta_valid_mix = MetaStockDataset(meta_type='valid-mix', **data_kwargs)
meta_test_time = MetaStockDataset(meta_type='test-time', **data_kwargs)
meta_test_stock = MetaStockDataset(meta_type='test-stock', **data_kwargs)
meta_test_mix = MetaStockDataset(meta_type='test-mix', **data_kwargs)

Processing data and candidates for train: 100%|██████████| 35/35 [00:00<00:00, 45.50it/s]
Processing data and candidates for valid-time: 100%|██████████| 35/35 [00:00<00:00, 81.01it/s]
Processing data and candidates for valid-stock: 100%|██████████| 10/10 [00:00<00:00, 59.98it/s]
Processing data and candidates for valid-mix: 100%|██████████| 10/10 [00:00<00:00, 55.44it/s]
Processing data and candidates for test-time: 100%|██████████| 35/35 [00:00<00:00, 82.57it/s]
Processing data and candidates for test-stock: 100%|██████████| 5/5 [00:00<00:00, 54.08it/s]
Processing data and candidates for test-mix: 100%|██████████| 5/5 [00:00<00:00, 73.43it/s]


In [None]:
all_data = meta_train.generate_tasks()
all_data

In [None]:
all_data.to('cpu')
all_data

In [None]:
all_data.numpy()
all_data

In [None]:
all_data['query'][..., 0].reshape(-1).shape

In [None]:
meta_train.meta_type

In [None]:
def draw_density(ds):
    all_data = ds.generate_tasks()
    fig, axes = plt.subplots(11, 2, figsize=(10, 16))
    for i in range(11):
        for t in range(2):
            
            f1_q = all_data['query'][..., i].reshape(-1)
            f1_s = all_data['support'][:, t, :, i].reshape(-1)
            sns.histplot(data=f1_q, ax=axes[i, t], color="blue", label='query', alpha=0.2)
            sns.histplot(data=f1_s, ax=axes[i, t], color="red", label='support', alpha=0.2)
            axes[i, t].legend()
            if i == 0:
                axes[i, t].set_title(f'Class: {t}')
    fig.suptitle(f'{ds.meta_type}')
    plt.tight_layout()
    plt.show()

In [None]:
draw_density(ds=meta_train)

In [None]:
draw_density(ds=meta_valid_time)

In [None]:
draw_density(ds=meta_valid_stock)

data distribution

In [None]:
from collections import Counter

def count_labels(meta_ds):
    cnts = Counter()
    for s in meta_ds.symbols:
        t = meta_ds.data[s].loc[meta_ds.candidates[s], 'label'].value_counts().to_dict()
        cnts.update(t)
    return cnts

cnt_data = {'ds': [], 'n_stock': [], 'fall': [], 'rise': []}
for ds in [meta_train, meta_valid_time, meta_valid_stock, meta_valid_mix, meta_test_time, meta_test_stock, meta_test_mix]:
    cnts = count_labels(ds)
    cnt_data['ds'].append(ds.meta_type)
    cnt_data['n_stock'].append(len(ds.symbols))
    cnt_data['fall'].append(cnts[0])
    cnt_data['rise'].append(cnts[1])
df_cnt = pd.DataFrame(cnt_data)
df_cnt

In [None]:
# q dist
from tqdm import tqdm

def plot_q_dist(meta_dataset):
    fig, ax = plt.subplots(1, 1, figsize=(6, 6))
    idx = np.arange(max(meta_dataset.q_dist.keys()))
    values = [meta_dataset.q_dist[i] if meta_dataset.q_dist.get(i) else 0 for i in idx]

    ax.bar(idx, values)
    ax.set_xlabel('Query index in labels')
    ax.set_ylabel('Count')
    ax.set_title(f'Meta Type: {meta_dataset.meta_type}')
    plt.tight_layout()
    return fig

In [None]:
meta_train.reset_q_idx_dist()
n = 1000
for i in tqdm(range(n), total=n):
    meta_train.generate_tasks()

In [None]:
fig = plot_q_dist(meta_train)

In [None]:
meta_train.data.keys()

In [None]:
meta_train.data['AAPL']

In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('./test_writer')
writer.add_figure('b', fig)

---

##  Check Time is Enough

In [19]:
meta_train.n_support

10

In [30]:
def test_time(ds, n_support=10):
    window_size = 15
    ds.n_support = n_support
    cnt = 0
    for symbol in ds.symbols:
        df_stock = ds.data[symbol]
        labels_indices = ds.candidates[symbol] 
        labels_indices = labels_indices[labels_indices >= window_size]

        for i in range(len(labels_indices)):
            array = df_stock.loc[labels_indices, 'label'].loc[:(labels_indices[i])].to_numpy()
            if ds.check_condition(array):
                break
        if i == len(labels_indices)-1:
            cnt += 1
    return cnt

In [35]:
ds_list = [meta_train, meta_valid_time, meta_valid_stock, meta_valid_mix, meta_test_time, meta_test_stock, meta_test_mix]

In [40]:
for ds in ds_list:
    print(f'{ds.meta_type}({len(ds.symbols)})', test_time(ds, n_support=30))

train(35) 0
valid-time(35) 0
valid-stock(10) 0
valid-mix(10) 0
test-time(35) 0
test-stock(5) 0
test-mix(5) 0


## Data generator

In [18]:
window_size = 15
symbol = 'AAPL'
df_stock = meta_train.data[symbol]
# filter out unpossible candidates
labels_indices = meta_train.candidates[symbol] 
labels_indices = labels_indices[labels_indices >= window_size]

for i in range(len(labels_indices)):
    array = df_stock.loc[labels_indices, 'label'].loc[:(labels_indices[i])].to_numpy()
    if meta_train.check_condition(array):
        break

# satisfied condition label index | smallest support index | smallest query index
# candidates = labels_indices[(i+1):]

In [11]:
meta_train.n_support, meta_train.n_classes

(10, 2)

In [12]:
array = df_stock.loc[labels_indices, 'label'].loc[:(labels_indices[29])].to_numpy()

In [13]:
array

array([0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 1])

In [15]:
cond1 = array.sum() >= meta_train.n_classes
cond2 = np.isin(array, meta_train.labels_dict['fall']).sum() >= meta_train.n_support
cond3 = np.isin(array, meta_train.labels_dict['rise']).sum() >= meta_train.n_support

In [None]:
labels_indices, candidates

In [None]:
len(candidates), len(df_stock)

In [None]:
df_stock.loc[labels_indices].iloc[:10, -1:]

In [None]:
data = dict(
    query = None,
    query_labels = None,
    support = None,
    support_labels = None,
)

q_target = np.random.choice(candidates)   # index in the dataframe
# for q_target in y_q:
    # Queries
q_idx = np.arange(len(labels_indices))[labels_indices == q_target][0]  # get the index of label data
q_end = np.array([q_target]) 
q_start = q_end - window_size
q_data, q_labels = meta_train.generate_data(df_stock, y_start=q_start, y_end=q_end)

data['query'] = q_data
data['query_labels'] = q_labels[0]  # (1,)

# Supports
s_fall, s_rise = meta_train.get_rise_fall(df_stock, labels_indices, idx=q_idx, n_select=meta_train.n_support)
s_end = np.concatenate([s_fall, s_rise])
s_start = s_end - window_size
s_data, s_labels = meta_train.generate_data(df_stock, y_start=s_start, y_end=s_end)

data['support'] = s_data
data['support_labels'] = s_labels  # (N*K,)

print()   
print(f'query index: {q_idx}({q_target}) = {df_stock.loc[q_target, "label"]}')
print(f'- start={q_start} end={q_end}')
print(f'support indices:')
print(f'- start={s_start} end={s_end}')
print(f'{df_stock.loc[s_end, "label"]}')


---

## Check queries distribution

In [None]:
from collections import Counter
window_size = 10
def get_q_label_dist(ds):
    q_label_dist = Counter()
    for symbol in ds.symbols:
        df_stock = ds.data[symbol]
        # filter out unpossible candidates
        labels_indices = ds.candidates[symbol] 
        labels_indices = labels_indices[labels_indices >= window_size]

        for i in range(len(labels_indices)):
            array = df_stock.loc[labels_indices, 'label'].loc[:(labels_indices[i])].to_numpy()
            if ds.check_condition(array):
                break
        candidates = labels_indices[(i+1):]  # query candidates
        
        counts = df_stock.loc[candidates, 'label'].value_counts().to_dict()
        q_label_dist.update(counts)
    
    return q_label_dist

In [None]:
q_label_dists = {'type': [], 'fall': [], 'rise': []}
for ds in [meta_train, meta_valid_time, meta_valid_stock, meta_valid_mix, 
    meta_test_time, meta_test_stock, meta_test_mix]:
    q_label_dist = get_q_label_dist(ds)
    q_label_dists['type'].append(ds.meta_type)
    q_label_dists['fall'].append(q_label_dist[0])
    q_label_dists['rise'].append(q_label_dist[1])

q_label_dists = pd.DataFrame(q_label_dists)

In [None]:
q_label_dists

---

# Modeling

In [None]:
import torch
import torch.nn as nn
from src.model import MetaModel

model_kwargs = meta_args.get_args(cls=MetaModel)
model = MetaModel(**model_kwargs)

rt_attn = True

## Forward

### forward_encoder

In [None]:
# encode_lstm
l, attn = model.encode_lstm(s_inputs, rt_attn=rt_attn)  # lstm_encoded: (B, N*K, E)
print(f'`l` Outputs: {l.size()}, {attn.size()}')
print(l[0])

In [None]:
import matplotlib.pyplot as plt

if isinstance(attn, torch.Tensor):
    attn_numpy = attn.detach().numpy()
else:
    attn_numpy = attn
masks = [0, 0, 1, 1]

B = attn_numpy.shape[0]
fig, axes = plt.subplots(1, B, figsize=(12, 10))
for i in range(B):
    ax = axes[i]
    ax.matshow(attn_numpy[i])
    ax.set_title(f'Data {i}')

for ax in axes:
    ax.set_yticks(np.arange(len(masks)))
    ax.set_yticklabels(masks)
    ax.set_ylabel('Label')
    ax.set_xlabel('Time Stamp')
plt.tight_layout()
plt.show()

In [None]:
# encode_linear
# Reshape the size
B = l.size(0)
N = model.output_size
K = l.size(1) // N
if rt_attn:
    attn = attn.view(B, N, K, -1)  # attn: (B, N, K, T)
l_reshape = l.view(B, N, K, -1)  # l_reshape: (B, N, K, E)
e = model.encoder(l_reshape)  # e: (B, N, K, H)
print(f'`encoded` Outputs: {e.size()}')
print(e[0])

Relation Net: class-conditional multivariate Gaussian distribution with a diagonal covariance

The paper concatenate tensors for relation net inputs.

Let $R(x_{i}^{p}, x_{j}^{q})$ to represent the inputs of hidden state on concatenated relations between classes, $i, j$ for shot index, $p, q$ for class index.

The tensor shape is $(B, N^2, K^2, 2H)$. For each data(row) in $B$, the data relationship is $\sum_{i, j}^N \sum_{p, q}^{K} R(x_{i}^{p}, x_{j}^{q})$

e.g.,  N way K shot = 2 way 2 shot

| Relation | Left | Right |
|---|---|---|
| $R(x_0^0, x_0^0)$ | $h_{K_0}^{N_0}$ | $h_{K_0}^{N_0}$ |
| $R(x_0^0, x_1^0)$ | $h_{K_0}^{N_0}$ | $h_{K_1}^{N_0}$ | 
| $R(x_1^0, x_1^0)$ | $h_{K_1}^{N_0}$ | $h_{K_0}^{N_0}$ |
| $R(x_1^0, x_0^0)$ | $h_{K_1}^{N_0}$ | $h_{K_1}^{N_0}$ | 
| | | |
| $R(x_0^0, x_0^1)$ | $h_{K_0}^{N_0}$ | $h_{K_0}^{N_1}$ |
| $R(x_0^0, x_1^1)$ | $h_{K_0}^{N_0}$ | $h_{K_1}^{N_1}$ | 
| $R(x_1^0, x_1^1)$ | $h_{K_1}^{N_0}$ | $h_{K_0}^{N_1}$ |
| $R(x_1^0, x_0^1)$ | $h_{K_1}^{N_0}$ | $h_{K_1}^{N_1}$ | 
| | | |
| $R(x_0^1, x_0^0)$ | $h_{K_0}^{N_1}$ | $h_{K_0}^{N_0}$ |
| $R(x_0^1, x_1^0)$ | $h_{K_0}^{N_1}$ | $h_{K_1}^{N_0}$ | 
| $R(x_1^1, x_1^0)$ | $h_{K_1}^{N_1}$ | $h_{K_0}^{N_0}$ |
| $R(x_1^1, x_0^0)$ | $h_{K_1}^{N_1}$ | $h_{K_1}^{N_0}$ | 
| | | |
| $R(x_0^1, x_0^1)$ | $h_{K_0}^{N_1}$ | $h_{K_0}^{N_1}$ |
| $R(x_0^1, x_1^1)$ | $h_{K_0}^{N_1}$ | $h_{K_1}^{N_1}$ | 
| $R(x_1^1, x_1^1)$ | $h_{K_1}^{N_1}$ | $h_{K_0}^{N_1}$ |
| $R(x_1^1, x_0^1)$ | $h_{K_1}^{N_1}$ | $h_{K_1}^{N_1}$ | 

In [None]:
# e.g.
a = torch.randn(1, 2, 2, 3)
left = torch.repeat_interleave(a, 2, dim=2)
left = torch.repeat_interleave(left, 2, dim=1)
right = a.repeat((1, 2, 2, 1))
temp = torch.cat([left, right], dim=-1)
temp

after relation network, average the values for each class for all shots($K$)

e.g.,  N way K shot = 2 way 2 shot

| Class | Relation |
|---|---|
| 0 | $f\big( R(x_0^0, x_0^0) \big)$ |
| 0 | $f\big( R(x_0^0, x_1^0) \big)$ |
| 0 | $f\big( R(x_1^0, x_1^0) \big)$ |
| 0 | $f\big( R(x_1^0, x_0^0) \big)$ | 
| 0 | $f\big( R(x_0^0, x_0^1) \big)$ |
| 0 | $f\big( R(x_0^0, x_1^1) \big)$ |
| 0 | $f\big( R(x_1^0, x_1^1) \big)$ |
| 0 | $f\big( R(x_1^0, x_0^1) \big)$ |
|   | |
| 1 | $f\big( R(x_0^1, x_0^0) \big)$ | 
| 1 | $f\big( R(x_0^1, x_1^0) \big)$ | 
| 1 | $f\big( R(x_1^1, x_1^0) \big)$ | 
| 1 | $f\big( R(x_1^1, x_0^0) \big)$ |
| 1 | $f\big( R(x_0^1, x_0^1) \big)$ |
| 1 | $f\big( R(x_0^1, x_1^1) \big)$ |
| 1 | $f\big( R(x_1^1, x_1^1) \big)$ |
| 1 | $f\big( R(x_1^1, x_0^1) \big)$ |

In [None]:
# e.g., if relation net is identity function, the output is
temp.view(1, 2, 2*2*2, 6)

In [None]:
# relation_net
hs = model.relation_net(e)  # hs: (B, N, 2H)
print(f'`hs` Outputs: {hs.size()}')
print(hs[0])

In [None]:
# sample: parameters of a probability distribution in a low-dimensional space z for each class
z, kld_loss = model.sample(hs, size=model.hidden_size)  # z: (B, N, H)
x = l.mean(1)  # x: (B, E)
print(f'`z` Outputs: {z.size()}')
print(z[0])
print()
print(f'`x` Outputs: {x.size()}')
print(x[0])

### forward_decoder

In [None]:
# decode
parameters = model.decode(z)
print(f'`parameters` Outputs: {parameters.size()}')
print(parameters[0])

In [None]:
# predict
loss, score = model.predict(x, parameters, s_labels)
print(f'Loss = {loss:.4f}\nScores =\n{score}')

In [None]:
total_loss, q_scores, s_attn, q_attn = model(
    data=data,
    rt_attn=True
)

In [None]:
total_loss

In [None]:
s_l, s_z, kld_loss, s_attn = model.forward_encoder(s_inputs, rt_attn=rt_attn)

# initialize z', Forward Decoder
z_prime = s_z
s_loss, s_scores, parameters = model.forward_decoder(z=z_prime, l=s_l, labels=s_labels)
# inner adaptation to z
for i in range(5):
    z_prime.retain_grad()
    s_loss.backward(retain_graph=True)
    z_prime = z_prime - model.inner_lr * z_prime.grad.data
    s_loss, s_scores, parameters = model.forward_decoder(z=z_prime, l=s_l, labels=s_labels)

# Stop Gradient: 
# z_prime.requires_grad == False
# s_z.requires_grad == True
z_prime = z_prime.detach()  
z_loss = torch.mean((z_prime - s_z)**2)


In [None]:
model.loss_fn(s_scores, s_labels.view(-1))

In [None]:
model.recorder.update('Support_Accuracy', s_scores, s_labels.view(-1))

# Metrics

In [None]:
from collections import defaultdict
import torchmetrics as tm
from typing import Dict, Tuple, List
import torch
import torch.nn as nn

In [None]:
torch.manual_seed(4)

t = all_data['query_labels']
o = torch.randint(0, 2, size=t.size())
# o = torch.rand(size=t.size())
y_true = pd.Series(t.numpy(), name='Actual') 
y_pred = pd.Series(o.numpy(), name='Pred')
df_confusion = pd.crosstab(y_true, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)
df_confusion

In [None]:
# recall 
df_confusion = pd.crosstab(y_true, y_pred, rownames=['Actual'], colnames=['Predicted'])
precision = df_confusion.values.diagonal() / df_confusion.sum(0)
recall = df_confusion.values.diagonal() / df_confusion.sum(1)
print(precision.rename('Precision'))
print(recall.rename('Recall'))

In [None]:
precision_tm = tm.Precision(num_classes=2, average=None)
p = precision_tm(o, t)

recall_tm = tm.Recall(num_classes=2, average=None)
r = recall_tm(o, t)

print(p)
print(r)

In [None]:
class MetricRecorder(nn.Module):
    def __init__(self):
        super().__init__()
        cs = tm.MetricCollection({
            'Accuracy': tm.Accuracy(), 
            'Precision': tm.Precision(num_classes=2, average=None), 
            'Recall': tm.Recall(num_classes=2, average=None), 
            'Loss': tm.SumMetric()
        })
        self.metrics = tm.MetricCollection([
            cs.clone('Support_'), cs.clone('Query_'), cs.clone('Finetune_'),
            tm.MetricCollection({
                'Inner': tm.MeanMetric(), 'Finetuning': tm.MeanMetric()
            }, postfix='_LR'),
            tm.MetricCollection({
                'Total': tm.SumMetric(), 
                'KLD': tm.SumMetric(), 
                'Z': tm.SumMetric(),
                'Orthogonality': tm.SumMetric()
            }, postfix='_Loss')
        ])

    @property
    def keys(self):
        return list(self.metrics.keys())

    def update(self, key, scores=None | torch.FloatTensor, targets=None | torch.LongTensor):
        if key.split('_')[-1] in ['Accuracy', 'Precision', 'Recall']:
            if targets is None:
                raise KeyError('Must insert `targets` to calculate accuracy.')
            self.metrics[key].update(scores, targets)
        else:
            self.metrics[key].update(scores)

    def compute(self, prefix: str):
        results = {}
        for k in self.keys:
            m = self.metrics[k].compute()
            if isinstance(m, torch.Tensor):
                m = m.cpu().detach().numpy()
            results[f'{prefix}-{k}'] = m
        return results

    def reset(self):
        for k in self.keys:
            self.metrics[k].reset()

    def extract_query_loss_acc(self, logs: Dict[str, float] | List[Dict[str, float]]) -> Dict[str, Tuple[float, float]]:
        to_filter = ['Query_Accuracy', 'Query_Loss']
        check_func = lambda x: sum([1 if f in x[0] else 0 for f in to_filter if f in x[0]])
        if isinstance(logs, dict):
            # cumulated logs
            filtered = dict(filter(check_func, logs.items()))
        else:
            filtered = {}
            for l in logs:
                win_filtered = dict(filter(check_func, l.items()))
                filtered.update(win_filtered)
        return filtered

In [None]:
recorder = MetricRecorder()
recorder.reset()  
print(recorder.keys)

In [None]:
import torch
import torch.nn as nn
from src.model import MetaModel

model_kwargs = meta_args.get_args(cls=MetaModel)
model = MetaModel(**model_kwargs)

In [None]:
loss, preds, *_ = model(all_data)
logs = model.recorder.compute(prefix='Valid-Time')

In [None]:
valid_logs = defaultdict(list)

In [None]:
for log_string, value in logs.items():
    # Precision, Recall: (2)
    valid_logs[log_string].append(value)

In [None]:
np.mean(valid_logs['Valid-Time-Support_Accuracy'])

In [None]:
for k, v in valid_logs.items():
    if k.split('_')[-1] in ['Precision', 'Recall']:
        valid_logs[k] = (np.mean(v, axis=0), np.std(v, axis=0))
    else:
        valid_logs[k] = (np.mean(v), np.std(v))

In [None]:
valid_logs['Valid-Time-Support_Precision']

In [None]:
scores = torch.FloatTensor([[0.0, 0.0], [0.0, 0.0]])
targets = torch.LongTensor([0, 0])
loss = torch.FloatTensor([0.0, 0.0])
lr = 0.276

recorder.update('Support_Accuracy', scores, targets) #
recorder.update('Support_Loss', loss)#
recorder.update('Query_Accuracy', scores, targets) #
recorder.update('Query_Loss', loss) #
recorder.update('Finetune_Accuracy', scores, targets) #
recorder.update('Finetune_Loss', loss) #
recorder.update('Finetuning_LR', lr) #
recorder.update('Inner_LR', lr) #
recorder.update('KLD_Loss', loss) #
recorder.update('Orthogonality_Loss', loss)  # 
recorder.update('Total_Loss', loss) #
recorder.update('Z_Loss', loss) #

In [None]:
recorder.compute()

In [None]:
recorder.update_window_metrics(5)

In [None]:
scores = torch.FloatTensor([[0.4, 1.2], [3.1, 1.2]])
targets = torch.LongTensor([1, 0])
loss = torch.FloatTensor([1.7, 1.6])
lr = 0.14

recorder.update('Support_Accuracy', scores, targets) #
recorder.update('Support_Loss', loss)#
recorder.update('Query_Accuracy', scores, targets) #
recorder.update('Query_Loss', loss) #
recorder.update('Finetune_Accuracy', scores, targets) #
recorder.update('Finetune_Loss', loss) #
recorder.update('Finetuning_LR', lr) #
recorder.update('Inner_LR', lr) #
recorder.update('KLD_Loss', loss) #
recorder.update('Orthogonality_Loss', loss)  # 
recorder.update('Total_Loss', loss) #
recorder.update('Z_Loss', loss) #

In [None]:
recorder.compute()

In [None]:
recorder.update_window_metrics(10)

In [None]:
logs = recorder.get_log_data('Train')
logs

In [None]:
recorder.extract_query_loss_acc(logs)

---

## Universe

In [None]:
import json 
import numpy as np


In [None]:

def create_universe(seed, stock_names):
    stocks = {}
    np.random.seed(seed)
    all_idx = np.arange(len(ps))
    train_idx = np.random.choice(all_idx, size=(int(len(ps)*0.7)), replace=False)
    valid_test_idx = all_idx[~np.isin(all_idx, train_idx)]
    valid_idx = np.random.choice(valid_test_idx, size=(int(len(valid_test_idx)*(0.2/0.3))), replace=False)
    test_idx = valid_test_idx[~np.isin(valid_test_idx, valid_idx)]
    stocks['train'] = list(stock_names[train_idx])
    stocks['valid'] = list(stock_names[valid_idx])
    stocks['test'] = list(stock_names[test_idx])
    stocks['seed'] = seed
    return stocks

In [None]:
ps = list((meta_train.data_dir / 'kdd17/price_long_50').glob('*.csv'))
stock_names = np.array([p.name.rstrip('.csv') for p in ps])
stocks = create_universe(seed=7, stock_names=stock_names)

In [None]:
import json

with (meta_train.data_dir / 'kdd17'/ 'stock_universe.json').open('w') as file:
    json.dump(stocks, file)

In [None]:
ps = list((meta_train.data_dir / 'stocknet-dataset/price/raw').glob('*.csv'))
stock_names = np.array([p.name.rstrip('.csv') for p in ps])
stocks = create_universe(seed=7, stock_names=stock_names)

with (meta_train.data_dir / 'stocknet-dataset'/ 'stock_universe.json').open('w') as file:
    json.dump(stocks, file)

-------

In [None]:
# 아래 필요없음

In [None]:
import json 
ps = list((meta_train.data_dir / 'kdd17/price_long_50').glob('*.csv'))
with (Path('../data').resolve() / 'kdd17/stock_universe.json').open('r') as file:
    universe_dict = json.load(file)

universe_key = 'known'
universe = universe_dict['0'][universe_key]
iterator = [p for p in ps if p.name.strip('.csv') in universe]

In [None]:
p = iterator[29]
stock_symbol = p.name.rstrip('.csv')
df_single = meta_train.load_single_stock(p)
df_single = df_single.loc[df_single["date"].between("2014-01-01", '2015-01-01')].reset_index(drop=True)

In [None]:
symbol = p.name.strip('.csv') # 'AMZN'
window_size = 5
n_support = 4
df_stock = meta_train.data[symbol]
labels_indices = meta_train.candidates[symbol]
labels_candidates = labels_indices[labels_indices >= window_size]
idx = meta_train.get_possible_idx(df_stock, labels_candidates)
labels_candidates = labels_candidates[idx:]

In [None]:
labels_candidates

In [None]:
df_stock.loc[:15, ['date', 'label']]

In [None]:
y_q = np.array([labels_candidates[0]])
y_qs = y_q - window_size
query, query_labels = meta_train.generate_data(df_stock, y_start=y_qs, y_end=y_q)

In [None]:
y_qs

In [None]:
query.round(4)

In [None]:
df_stock.loc[10:14]

In [None]:
df_stock.loc[:6, ['date', 'label']]

In [None]:
labels_candidates

In [None]:
def get_possible_idx(df_stock, labels_candidates):
    i = 0
    while i < len(labels_candidates):
        rise, fall = get_rise_fall(df_stock, labels_candidates, idx=i)
        if len(rise) + len(fall) == 4:
            break
        else:
            i += 1
    return i

def get_rise_fall(df_stock, labels_candidates, idx):
    df_check = df_stock.loc[labels_candidates[:idx], 'label'].sort_index(ascending=False)
    rise = df_check.index[df_check == meta_train.labels_dict['rise']][:(n_support // 2)].to_numpy()
    fall = df_check.index[df_check == meta_train.labels_dict['fall']][:(n_support // 2)].to_numpy()
    return rise, fall

In [None]:
# remove unpossible candidates
idx = get_possible_idx(df_stock, labels_candidates)
labels_candidates = labels_candidates[idx:]


In [None]:
y_q

In [None]:
y_q = np.array(np.random.choice(labels_candidates, size=(5,), replace=False))
y_qs = y_q - window_size
query, query_labels = meta_train.generate_data(df_stock, y_start=y_qs, y_end=y_q)
support = []
support_labels = []
for q in y_q:
    q_idx = np.arange(len(labels_candidates))[labels_candidates == q][0]
    rise, fall = get_rise_fall(df_stock, labels_candidates, idx=q_idx)
    y_s = np.concatenate([fall, rise])
    y_ss = y_s - window_size
    data_s, label_s = meta_train.generate_data(df_stock, y_start=y_ss, y_end=y_s)
    data_s = np.array(data_s)
    support.append(data_s)
    support_labels.append(label_s)

In [None]:
for x in np.expand_dims(query_labels, 1):
    print(x)

In [None]:
x

In [None]:
np.array(support).shape

In [None]:
query_labels

In [None]:
q = y_q[0]
q_idx = np.arange(len(labels_candidates))[labels_candidates == q][0]
rise, fall = get_rise_fall(df_stock, labels_candidates, idx=q_idx)

In [None]:
y_s = np.concatenate([fall, rise])
y_ss = y_s - window_size

In [None]:
support, support_labels = meta_train.generate_data(df_stock, y_start=y_ss, y_end=y_s)


In [None]:
symbol = 'AMZN'
window_size = 5
n_shot = 2
df_stock = meta_train.data[symbol]
labels_indices = meta_train.candidates[symbol]
y_cand = labels_indices[labels_indices >= window_size]
n_rise = 0
n_fall = 0
support= []
support_sample = []
query = []
support_turn = True
query_turn = False
query_sample = []
for idx in y_cand:

    # ex. k = 2
    if support_turn and  n_rise < n_shot or n_fall < n_shot:
        if n_rise < 2 and df_stock['label'][idx] == 1:
            n_rise +=1
            support_sample.append(idx)
        elif n_fall < 2 and df_stock['label'][idx] == 0:
            n_fall +=1
            support_sample.append(idx)
        continue

    if n_rise == n_shot and n_fall == n_shot:
        support.append(support_sample)
        support_sample = []
        n_rise = 0
        n_fall = 0
        query_turn = True
        support_turn = False 

    if query_turn:
        query_sample.append(idx)
        query.append(query_sample)
        query_sample = []
        query_turn = False
        support_turn = True
        continue
support_idx_set = np.array(support)
query_idx_set = np.array(query)
print(len(support_idx_set), len(query_idx_set))

In [None]:
query_idx_set

In [None]:
labels_indices = self.candidates[symbol]
labels_candidates = labels_indices[labels_indices >= window_size]
y_s = np.array(sorted(np.random.choice(labels_candidates, size=(self.n_sample,), replace=False)))
y_ss = y_s-window_size
support, support_labels = self.generate_data(df_stock, y_start=y_ss, y_end=y_s)

# code for jumpped tags like [1(support), 0, 0, 1(query)]
# y_q = labels_indices[np.arange(len(labels_indices))[np.isin(labels_indices, y_s)] + self.n_lag]
y_q = y_s + self.n_lag
y_qs = y_s - window_size if self.keep_support_history else y_q - window_size
query, query_labels = self.generate_data(df_stock, y_start=y_qs, y_end=y_q)