## サンプルデータを作成

In [21]:
import pandas as pd
import numpy as np
import datetime

In [22]:
coln_l = ["date", "drtnp", "drtnf", "prob", "training_loss", "training_acc", "validation_loss", "validataion_acc"]

In [23]:
# 月曜日ベースで1989/1/1から2021/12/31まで日付を作成
from_date = datetime.date(1989, 1, 1)
to_date = datetime.date(2021, 12, 31)
data_df = pd.DataFrame(pd.date_range(from_date, to_date, freq="W-MON"), columns=["date"])
data_df.head()

Unnamed: 0,date
0,1989-01-02
1,1989-01-09
2,1989-01-16
3,1989-01-23
4,1989-01-30


In [24]:
# 乱数を割り当て
data_df = data_df.assign(
    drtnp = np.random.normal(loc=0.05 / 52, scale=0.2 / np.sqrt(52), size=data_df.shape[0]), # 平均: 0.05，標準偏差 0.2の正規分布
    drtnf = np.random.normal(loc=0.05 / 52, scale=0.2 / np.sqrt(52), size=data_df.shape[0]), # 平均: 0.05，標準偏差 0.2の正規分布
    prob = np.random.rand(data_df.shape[0]), # 0 ~ 1の一様分布
    training_loss = np.random.rand(data_df.shape[0]), # 0 ~ 1の一様分布
    training_acc = np.random.rand(data_df.shape[0]), # 0 ~ 1の一様分布
    validation_loss = np.random.rand(data_df.shape[0]), # 0 ~ 1の一様分布
    validation_acc = np.random.rand(data_df.shape[0]), # 0 ~ 1の一様分布
)
data_df.head()

Unnamed: 0,date,drtnp,drtnf,prob,training_loss,training_acc,validation_loss,validation_acc
0,1989-01-02,-0.027314,-0.005678,0.254817,0.646305,0.662528,0.930006,0.850613
1,1989-01-09,-0.014701,-0.003541,0.418558,0.407104,0.89729,0.459196,0.760858
2,1989-01-16,-0.018571,0.010755,0.397871,0.897005,0.568546,0.210512,0.977553
3,1989-01-23,0.026887,-0.010368,0.391012,0.358994,0.331049,0.942965,0.039518
4,1989-01-30,-0.013721,-0.017944,0.25275,0.051428,0.850405,0.135836,0.096507


In [25]:
# drtnfをdrtnpからずらしたものにする
data_df.loc[:, "drtnf"] = data_df["drtnp"].shift(-1)
data_df.head()

Unnamed: 0,date,drtnp,drtnf,prob,training_loss,training_acc,validation_loss,validation_acc
0,1989-01-02,-0.027314,-0.014701,0.254817,0.646305,0.662528,0.930006,0.850613
1,1989-01-09,-0.014701,-0.018571,0.418558,0.407104,0.89729,0.459196,0.760858
2,1989-01-16,-0.018571,0.026887,0.397871,0.897005,0.568546,0.210512,0.977553
3,1989-01-23,0.026887,-0.013721,0.391012,0.358994,0.331049,0.942965,0.039518
4,1989-01-30,-0.013721,0.056941,0.25275,0.051428,0.850405,0.135836,0.096507


In [26]:
data_df.describe()

Unnamed: 0,drtnp,drtnf,prob,training_loss,training_acc,validation_loss,validation_acc
count,1722.0,1721.0,1722.0,1722.0,1722.0,1722.0,1722.0
mean,0.001124,0.00114,0.498864,0.505019,0.503736,0.507564,0.496748
std,0.027766,0.027766,0.284615,0.28912,0.288955,0.28848,0.291207
min,-0.100386,-0.100386,0.000437,0.000152,0.000372,0.00099,0.000409
25%,-0.017996,-0.01798,0.26047,0.253575,0.255303,0.262541,0.244763
50%,0.002019,0.002056,0.502029,0.508905,0.512007,0.511902,0.49265
75%,0.019108,0.019125,0.735886,0.754991,0.751024,0.752877,0.750773
max,0.096252,0.096252,0.99915,0.99767,0.999959,0.999175,0.999404


In [27]:
# データがない期間はNAで埋める
mask1 = data_df["date"] >= "2010/1/14"
mask2 = data_df["date"] <= "2021/3/29"
mask = mask1 & mask2
coln_l = [x for x in data_df.columns.tolist() if x not in ["date", "drtnp", "drtnf"]]
data_df.loc[~mask, coln_l] = pd.NA
data_df.head()

Unnamed: 0,date,drtnp,drtnf,prob,training_loss,training_acc,validation_loss,validation_acc
0,1989-01-02,-0.027314,-0.014701,,,,,
1,1989-01-09,-0.014701,-0.018571,,,,,
2,1989-01-16,-0.018571,0.026887,,,,,
3,1989-01-23,0.026887,-0.013721,,,,,
4,1989-01-30,-0.013721,0.056941,,,,,


In [28]:
data_df.tail()

Unnamed: 0,date,drtnp,drtnf,prob,training_loss,training_acc,validation_loss,validation_acc
1717,2021-11-29,-0.030089,0.009657,,,,,
1718,2021-12-06,0.009657,0.003164,,,,,
1719,2021-12-13,0.003164,-0.018637,,,,,
1720,2021-12-20,-0.018637,-0.038415,,,,,
1721,2021-12-27,-0.038415,,,,,,


In [29]:
data_df.to_csv("./sample.csv", index=False)