In [22]:
import h5py
import numpy as np
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import joblib
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, root_mean_squared_error
import pandas as pd
import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from preprocessor import get_dataset, get_and_process_data, decoding, load_and_process_example
from qwen import load_qwen
import joblib

In [2]:
#for matplotlib plots
SMALL_SIZE = 15+5
MEDIUM_SIZE = 20+5
BIGGER_SIZE = 25+5

plt.rc('font', size=SMALL_SIZE)
plt.rc('axes', titlesize=SMALL_SIZE)
plt.rc('axes', labelsize=MEDIUM_SIZE)
plt.rc('xtick', labelsize=SMALL_SIZE)
plt.rc('ytick', labelsize=SMALL_SIZE)
plt.rc('legend', fontsize=SMALL_SIZE)
plt.rc('figure', titlesize=BIGGER_SIZE)

In [3]:
model, tokenizer = load_qwen()

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [4]:
file_path = 'lotka_volterra_data.h5'

In [5]:
def running_mse(prediction, actual):
    mse = []
    for i in range(len(prediction)):
        mse.append(mean_squared_error(prediction[:i+1], actual[:i+1]))
    return mse

In [8]:
example_tokenized_data, example_preprocessed_data, example_data, example_times = get_and_process_data(file_path, tokenizer, system_id=700, points=2, alpha=5, decimals=3)
print("Example prey values:", np.array(example_data[:,0]), example_data[:,2])
print("Example predator values:", np.array(example_data[:,1]), example_data[:,3])
print("Preprocessed data:", example_preprocessed_data)
print("Tokenized results:", example_tokenized_data["input_ids"].tolist()[0])

Example prey values: [1.1335121  0.55542254] [2.267 1.111]
Example predator values: [1.1031258 1.2579137] [2.206 2.516]
Preprocessed data: 2.267,2.206;1.111,2.516
Tokenized results: [17, 13, 17, 21, 22, 11, 17, 13, 17, 15, 21, 26, 16, 13, 16, 16, 16, 11, 17, 13, 20, 16, 21]


In [9]:
example_tokenized_data, example_preprocessed_data, example_data, example_times = load_and_process_example(file_path, tokenizer, points=2, alpha=5, decimals=3, seed=442, id=0)
print("Example prey values:", np.array(example_data[:,0][:3]), example_data[:,2][:3])
print("Example predator values:", np.array(example_data[:,1][:3]), example_data[:,3][:3])
print("Preprocessed data:", example_preprocessed_data)
print("Tokenized results:", example_tokenized_data.input_ids[0].tolist())

Example prey values: [1.029208  0.7163228 0.4881839] [2.058 1.433 0.976]
Example predator values: [0.98331004 1.0607902  1.0347391 ] [1.967 2.122 2.069]
Preprocessed data: 2.058,1.967;1.433,2.122
Tokenized results: [17, 13, 15, 20, 23, 11, 16, 13, 24, 21, 22, 26, 16, 13, 19, 18, 18, 11, 17, 13, 16, 17, 17]
