In [330]:
"""
Sample from a trained model
"""
import os
import pickle
from contextlib import nullcontext
import torch
import tiktoken
from model import GPTConfig, GPT
import json

# -----------------------------------------------------------------------------
init_from = 'resume' # either 'resume' (from an out_dir) or a gpt2 variant (e.g. 'gpt2-xl')
out_dir = 'out-stock' # ignored if init_from is not 'resume'
start = "\n" # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt"
num_samples = 10 # number of samples to draw
max_new_tokens = 500 # number of tokens generated in each sample
temperature = 0.8 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
top_k = 10 # retain only the top_k most likely tokens, clamp others to have 0 probability
seed = 1337
device = 'cpu' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
compile = False # use PyTorch 2.0 to compile the model to be faster
# -----------------------------------------------------------------------------


In [331]:

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
device_type = 'cpu' # for later use in torch.autocast
ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)

# model
if init_from == 'resume':
    # init from a model saved in a specific directory
    ckpt_path = os.path.join(out_dir, 'ckpt.pt')
    checkpoint = torch.load(ckpt_path, map_location=device)
    gptconf = GPTConfig(**checkpoint['model_args'])
    model = GPT(gptconf)
    state_dict = checkpoint['model']
    unwanted_prefix = '_orig_mod.'
    for k,v in list(state_dict.items()):
        if k.startswith(unwanted_prefix):
            state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
    model.load_state_dict(state_dict)


model.eval()
model.to(device)


# look for the meta pickle in case it is available in the dataset folder
load_meta = False

if init_from == 'resume' and 'config' in checkpoint and 'dataset' in checkpoint['config']: # older checkpoints might not have these...
    meta_path = os.path.join('data', checkpoint['config']['dataset'], 'meta.pkl')
    load_meta = os.path.exists(meta_path)


print(f"Loading meta from {meta_path}...")
# meta数据
meta = {}
with open(meta_path, 'r') as f:
    meta = json.load(f)
def decode(id):
    return meta['itos'][str(id)]



for name, params in model.named_parameters():
    print(f'name is {name}, params is {params}')


number of parameters: 1.61M
Loading meta from data/stock/meta.pkl...
name is transformer.wpe.weight, params is Parameter containing:
tensor([[-0.0295, -0.0224,  0.0681,  ...,  0.0531,  0.0359,  0.0362],
        [-0.0003,  0.0019,  0.0434,  ..., -0.0273, -0.0077,  0.0400],
        [ 0.0228, -0.0376,  0.0236,  ...,  0.0058,  0.0123,  0.0410],
        ...,
        [ 0.0242, -0.0071,  0.0525,  ...,  0.0158, -0.0151,  0.0173],
        [ 0.0165, -0.0487,  0.0080,  ..., -0.0029, -0.0228,  0.0300],
        [ 0.0057, -0.0481,  0.0328,  ..., -0.0303,  0.0222,  0.0212]],
       requires_grad=True)
name is transformer.h.0.ln_1.weight, params is Parameter containing:
tensor([0.8220, 0.7370, 0.8032, 0.6294, 0.8202, 0.6684, 0.8476, 0.6883, 0.8903,
        0.6361, 0.8297, 0.8298, 0.8982, 0.7807, 0.8863, 0.6275, 0.8073, 0.7070,
        0.8641, 0.7677, 0.8325, 0.6485, 0.8234, 0.6023, 0.7877, 0.6848, 0.8401,
        0.5607, 0.8445, 0.7290, 0.8139, 0.6776, 0.8685, 0.6314, 0.8475, 0.7155,
        0.8767, 0

In [332]:
import numpy as np
import pandas as pd

pd_train_data = pd.read_csv(os.path.join('data/stock', 'train.csv')).iloc[-1000:,:]
# pd_train_data.iloc[:, range(1, pd_train_data.shape[1], 2)] = 1
pd_val_data = pd.read_csv(os.path.join('data/stock', 'val.csv'))
# pd_val_data.iloc[:, range(1, pd_val_data.shape[1], 2)] = 1

n_embd = 256
# 将NumPy数组转换为pandas DataFrame
train_data = torch.tensor(pd_train_data.iloc[:, 1:n_embd+1].values, dtype=torch.float16)
val_data = torch.tensor(pd_val_data.iloc[:100, 1:n_embd+1].values, dtype=torch.float16)

pd_train_data.iloc[-50:,]



Unnamed: 0,trade_date,000001.SZ_amount_chg,000001.SZ_close_chg,000002.SZ_amount_chg,000002.SZ_close_chg,000004.SZ_amount_chg,000004.SZ_close_chg,000005.SZ_amount_chg,000005.SZ_close_chg,000006.SZ_amount_chg,...,600308.SH_amount_chg,600308.SH_close_chg,600309.SH_amount_chg,600309.SH_close_chg,600310.SH_amount_chg,600310.SH_close_chg,600312.SH_amount_chg,600312.SH_close_chg,600313.SH_amount_chg,600313.SH_close_chg
3045,20220718,0.7179,1.0113,0.6093,1.0159,0.5754,1.0206,0.8425,1.0178,0.6532,...,0.8801,1.0351,0.644,1.0263,0.875,1.0447,1.1879,0.9954,0.6582,1.0016
3046,20220719,0.6565,1.0022,0.7406,0.9944,1.4858,1.0498,0.8352,1.0058,0.821,...,0.9473,1.0196,1.1213,0.9863,0.6814,0.9798,0.6822,0.9816,0.8872,1.0008
3047,20220720,1.0648,0.9978,0.8965,0.9977,0.8636,1.0079,1.8017,1.0289,0.9445,...,0.737,1.0018,1.4763,0.9844,3.3033,0.9434,0.99,1.0176,1.2722,0.9754
3048,20220721,2.4105,0.9716,1.8964,0.9735,0.718,1.0,0.6519,1.0056,1.112,...,1.2638,1.0,1.0867,0.9766,0.6001,0.9591,0.7819,0.9782,1.5606,0.9336
3049,20220722,0.6782,1.0023,0.6094,1.0052,1.0971,1.0112,1.1409,1.0,0.7168,...,0.9327,1.007,0.7669,0.993,0.9494,1.0199,0.8844,1.0,0.5856,0.9739
3050,20220725,0.556,1.0008,1.1398,1.0069,1.4092,1.0299,1.5147,1.0056,1.6166,...,0.9516,1.0017,0.4852,0.9938,0.6917,0.9944,0.9895,0.9683,0.7427,0.9704
3051,20220726,1.3465,1.007,1.7431,1.0314,1.0538,1.0194,1.4805,1.0278,1.4481,...,0.8004,1.0069,0.9128,1.0059,0.6218,0.9972,0.8333,1.0206,0.8864,1.0153
3052,20220727,0.9,0.9907,0.5136,0.9839,1.176,0.9736,0.64,0.9946,0.6145,...,1.0288,1.0051,1.7681,0.9826,1.2788,0.9831,1.0537,0.9964,0.9581,1.0056
3053,20220728,0.8157,1.007,1.0444,0.9938,0.8572,1.0163,0.6803,0.9946,1.117,...,0.952,0.9966,1.017,0.9988,0.8297,0.9971,0.9929,0.9845,1.0378,1.0019
3054,20220729,1.0981,0.9845,1.2584,0.9751,0.6268,0.9819,1.1437,1.0,0.8207,...,1.1366,0.9932,1.4044,0.983,1.5111,1.0,2.9515,1.04,1.2109,0.9944


In [354]:
block = 32

data = train_data[-50:]

for ix in range(32, 50):
    x = data[ix-block:ix, :]
    value, indices = model.generate_last(x.unsqueeze(0), top_k=10)
    if value[0][0] > 0.0:
        print(f'ix={ix}, predict, probs={value}, id={indices[0,0]}, real={data[ix, indices[0,0]*2+1]}, topk={(data[ix, range(1,data.shape[1],2)]).topk(30)[0]}')
    


data.shape



ix=32, predict, probs=tensor([[0.1759, 0.0960, 0.0893, 0.0596, 0.0411, 0.0406, 0.0397, 0.0373, 0.0311,
         0.0278]]), id=57, real=0.9638671875, topk=tensor([1.1006, 1.0469, 1.0410, 1.0322, 1.0273, 1.0215, 1.0205, 1.0205, 1.0137,
        1.0137, 1.0117, 1.0107, 1.0088, 1.0078, 1.0068, 1.0059, 1.0059, 1.0049,
        1.0039, 1.0020, 1.0020, 1.0000, 1.0000, 1.0000, 0.9985, 0.9971, 0.9971,
        0.9951, 0.9946, 0.9946], dtype=torch.float16)
ix=33, predict, probs=tensor([[0.1802, 0.1277, 0.0977, 0.0676, 0.0472, 0.0448, 0.0431, 0.0420, 0.0340,
         0.0297]]), id=117, real=0.9853515625, topk=tensor([1.0996, 1.0996, 1.0996, 1.0928, 1.0742, 1.0674, 1.0596, 1.0537, 1.0449,
        1.0352, 1.0283, 1.0283, 1.0264, 1.0234, 1.0225, 1.0205, 1.0195, 1.0176,
        1.0176, 1.0166, 1.0166, 1.0156, 1.0156, 1.0146, 1.0137, 1.0127, 1.0127,
        1.0127, 1.0117, 1.0117], dtype=torch.float16)
ix=34, predict, probs=tensor([[0.3030, 0.1009, 0.0782, 0.0617, 0.0460, 0.0370, 0.0300, 0.0270, 0.0230,


torch.Size([50, 256])

In [334]:
import math
math.log10(2)


0.3010299956639812

In [335]:

# run generation
with torch.no_grad():
    with ctx:  
        probs, index = model.generate_last(eval_X, top_k=top_k)
        print(f'predict index is {index}, index.shape is {index.shape}')
        print(f'predict probs is {probs}, probs.shape is {probs.shape}')
        print('---------------')


NameError: name 'eval_X' is not defined