# Demo Inference
Use this notebook to load models in directly and evaluate them on the test set!

## 0. Setup

In [1]:
# Automatic reloading
%load_ext autoreload
%autoreload 2

In [2]:
####################
# Required Modules #
####################

# Generic/Built-in
import random
import sys 
import os

# Libs
import numpy as np
import torch
from torch.utils.data import DataLoader

The cell below sets up the environment by adding the project’s root directory to the system path and changing the current working directory, enabling imports from the `/src` folder. **This cell should only be ran once per session.**

In [3]:
# Get the project directory 
current_dir = os.path.abspath('') # Current '\notebooks' directory
project_dir = os.path.abspath(os.path.join(current_dir, '..')) # Move up one level to project root directory

# Add the project directory to sys.path
sys.path.append(project_dir)

# Move up to project directory
os.chdir(project_dir)
os.getcwd()

'c:\\Users\\Ryan Lee\\Desktop\\50.038 Computational Data Science\\Digital-Asset-Prediction'

In [4]:
# Import custom modules
from src.dataset import *
from src.models import *
from src.train_eval import *
from src.utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Seeding
SEED = 42

# To be safe, seed all modules for full reproducibility
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)  # If using CUDA
np.random.seed(SEED)
random.seed(SEED)

In [6]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## 2. Prepare Test Set

In [7]:
# File paths
TRAIN_PATH = "data/processed/train_set.csv" # We only need training set to get back our normalization statistics
TEST_PATH = "data/processed/test_set.csv"

# Hyperparameters
INPUT_SEQUENCE_LENGTH = 14 # Number of timesteps (days) in input sequence
DATASET_STRIDE = 1
BATCH_SIZE = 256 # not important for testing

In [8]:
# Create Dataset object
test_dataset = CryptoDataset(
    csv_file=TEST_PATH,
    seq_length=INPUT_SEQUENCE_LENGTH,
    stride=DATASET_STRIDE
)

print("Total number of samples (sequences)")
print("Test:", len(test_dataset))

test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

Total number of samples (sequences)
Test: 10000


In [13]:
# Need to normalize our input data w.r.t training set
train_dataset = CryptoDataset(
    csv_file=TRAIN_PATH,
    seq_length=INPUT_SEQUENCE_LENGTH,
    stride=DATASET_STRIDE
)
normalizer = Normalizer()
normalizer.fit(training_dataset=train_dataset)

{'mean': tensor([ 4.7167e+02,  4.8263e+02,  4.6039e+02,  4.7177e+02,  4.3276e+08,
          6.2134e+07,  2.2808e+00,  4.3239e+03, -4.2273e-01,  4.6805e+01,
          1.9130e+03]),
 'std': tensor([3.6570e+03, 3.7372e+03, 3.5740e+03, 3.6608e+03, 4.5931e+09, 4.1551e+08,
         6.3604e+02, 4.3362e+02, 3.1840e-01, 2.0331e+01, 1.3954e+02])}

## 3. Load Model
You can load in our trained model parameters from `/saved_models`. For your convenience, the model hyperparameters for those trained parameters have been set as the default for the model class constructors. If you wish to review information about the model's training, you can refer to `/results`.

In [18]:
# Example
model = CryptoTransformer()

# Load model parameters
model_params_path = "saved_models/CryptoTransformer/Best_R2.pth" # Specify path to `pth` file here
model.load_state_dict(torch.load(model_params_path))

model.to(device) # Manually assign here - usually training code does this automatically for us

CryptoTransformer(
  (embedding): Linear(in_features=11, out_features=64, bias=True)
  (positional_encoder): SinusoidalPositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fc): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): ReLU()
    (

## 4. Evaluate on Test Set

In [19]:
final_evaluation_loss, final_mae, final_r2, final_explained_var = evaluate_crypto_model(model, test_loader, normalizer)
print(f"Loss: {final_evaluation_loss:.4f}, MAE: {final_mae:.4f}, R2: {final_r2:.4f}, Explained Variance: {final_explained_var:.4f}")

Loss: 9131190.9354, MAE: 377.2178, R2: 0.8958, Explained Variance: 0.8970


In [20]:
from pprint import pformat
import textwrap

# Save results
base_dir = "demo_results"
os.makedirs(base_dir, exist_ok=True)

file_name = f"{type(model).__name__}_demo_results.txt" # Feel free to change
results_file = os.path.join(base_dir, file_name)
    
text = textwrap.dedent(f"""\
    {type(model).__name__}
    [Test Results]
    Loss: {final_evaluation_loss}
    MAE: {final_mae}
    R2: {final_r2}
    Explained Variance: {final_explained_var}
""")

with open(results_file, "w") as f:
    f.write(text)