In [50]:
import pandas
from torchsummary import summary
import random
import sys 
import os
import numpy as np
import torch
from torch.utils.data import DataLoader, random_split
import torch.optim as optim

In [71]:
# Get the project directory 
current_dir = os.path.abspath('') # Current '\notebooks' directory
project_dir = os.path.abspath(os.path.join(current_dir, '..')) # Move up one level to project root directory

# Add the project directory to sys.path
sys.path.append(project_dir)

# Move up to project directory
os.chdir(project_dir)
os.getcwd()

'/Users/aishwaryaiyer'

In [72]:
from src.models import *



In [73]:
transformer_model_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/saved_models/CryptoTransformer_2025-04-09_21-31-23/CryptoTransformer_BEST_R2.pth"
informer_model_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/saved_models/CryptoInformer_2025-04-10_15-11-19/CryptoInformer_BEST_R2.pth"

In [74]:
transformer_model = CryptoTransformer()  
informer_model = CryptoInformer()


In [75]:
transformer_model.load_state_dict(torch.load(transformer_model_path, map_location=torch.device('cpu')))
informer_model.load_state_dict(torch.load(informer_model_path, map_location=torch.device('cpu')))

transformer_model.eval()
informer_model.eval()

  transformer_model.load_state_dict(torch.load(transformer_model_path, map_location=torch.device('cpu')))
  informer_model.load_state_dict(torch.load(informer_model_path, map_location=torch.device('cpu')))


CryptoInformer(
  (embedding): Linear(in_features=11, out_features=64, bias=True)
  (positional_encoder): SinusoidalPositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (attn): ProbAttention(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder_layers): ModuleList(
    (0-1): 2 x EncoderLayer(
      (attention): AttentionLayer(
        (inner_attention): ProbAttention(
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (query_projection): Linear(in_features=64, out_features=64, bias=True)
        (key_projection): Linear(in_features=64, out_features=64, bias=True)
        (value_projection): Linear(in_features=64, out_features=64, bias=True)
        (out_projection): Linear(in_features=64, out_features=64, bias=True)
      )
      (conv1): Conv1d(64, 256, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(256, 64, kernel_size=(1,), stride=(1,))
      (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((64,)

In [77]:
CSV_PATH = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/data/processed/combined_dataset_v1.csv"
df = pd.read_csv(CSV_PATH, parse_dates=['date'])




df.sort_values(['symbol', 'date'], inplace=True)

# 1. Select a random crypto ticker
all_tickers = df['symbol'].unique()
random_ticker = np.random.choice(all_tickers)
print(f"Selected ticker: {random_ticker}")

# 2. Get the most recent 14 days of data for this ticker
ticker_data = df[df['symbol'] == random_ticker].tail(14)

Selected ticker: ALGO/USDT


In [78]:
class CryptoDataset(Dataset):
    def __init__(self, data: pd.DataFrame, features: list, target: str = None, sequence_length: int = 14):
        self.data = data[features].values
        self.target = data[target].values if target else None
        self.sequence_length = sequence_length
        
    def __len__(self):
        # Return 1 since we just want the full sequence for prediction
        return 1 if len(self.data) == self.sequence_length else len(self.data) - self.sequence_length
        
    def __getitem__(self, idx) -> Tuple[torch.Tensor, torch.Tensor]:
        seq = self.data[idx:idx+self.sequence_length]
        if self.target is not None and (idx + self.sequence_length) < len(self.data):
            target = self.target[idx+self.sequence_length]
            return torch.FloatTensor(seq), torch.FloatTensor([target])
        return torch.FloatTensor(seq), torch.FloatTensor([0])  # Dummy target for prediction

In [79]:
features = ['open', 'high', 'low', 'close', 'volume', 'market_cap', 
            'daily_return', 'sp500', 'treasury_spread', 'fear_greed', 'gold_price_usd']
target = 'close'

# Create a single sequence dataset
dataset = CryptoDataset(ticker_data, features, target)
seq, _ = dataset[0]  # Get our 14-day sequence (shape: [14, num_features])


In [87]:
def predict_ticker(ticker_data, model, model_name):
    dataset = CryptoDataset(ticker_data, features, target=None)
    seq, _ = dataset[0]
    
    with torch.no_grad():
        input_seq = seq.unsqueeze(0)
        normalizer.fit(input_seq)
        pred = model(input_seq)
        
    return pred.item()

In [91]:
all_tickers = df['symbol'].unique()
selected_tickers = np.random.choice(all_tickers, size=20, replace=False)

# Prepare results DataFrame
results = pd.DataFrame(columns=[
    'Ticker', 
    'Last_Close', 
    'Transformer_Pred', 
    'Informer_Pred',
    'Transformer_Diff',
    'Informer_Diff'
])

# Get predictions for each ticker
for ticker in selected_tickers:
    ticker_data = df[df['symbol'] == ticker].tail(14)
    
    if len(ticker_data) < 14:
        continue  # Skip if not enough data
        
    last_close = ticker_data['close'].iloc[-1]
    
    # Get predictions
    transformer_pred = predict_ticker(ticker_data, transformer_model, "Transformer")
    informer_pred = predict_ticker(ticker_data, informer_model, "Informer")
    
    # Calculate absolute differences
    transformer_diff = abs(transformer_pred - last_close)
    informer_diff = abs(informer_pred - last_close)
    
    # Add to results
    results.loc[len(results)] = {
        'Ticker': ticker,
        'Last_Close': last_close,
        'Transformer_Pred': transformer_pred,
        'Informer_Pred': informer_pred,
        'Transformer_Diff': transformer_diff,
        'Informer_Diff': informer_diff
    }

# Calculate some statistics
results['Transformer_Error_Pct'] = (results['Transformer_Diff'] / results['Last_Close']) * 100
results['Informer_Error_Pct'] = (results['Informer_Diff'] / results['Last_Close']) * 100

# Save to CSV
output_path = "predictions.csv"
results.to_csv(output_path, index=False)

print(f"\nPredictions saved to: {output_path}")
print("\nSummary Statistics:")
print(results[['Transformer_Error_Pct', 'Informer_Error_Pct']].describe())

# Display first few results
print("\nSample Predictions:")
print(results.head())


Predictions saved to: predictions.csv

Summary Statistics:
       Transformer_Error_Pct  Informer_Error_Pct
count           2.000000e+01        2.000000e+01
mean            1.077036e+06        3.336484e+06
std             4.451610e+06        1.372156e+07
min             8.383273e+01        1.696007e+02
25%             1.185869e+03        7.304357e+03
50%             3.993379e+03        1.587803e+04
75%             1.408060e+04        5.451039e+04
max             1.996359e+07        6.154303e+07

Sample Predictions:
      Ticker  Last_Close  Transformer_Pred  Informer_Pred  Transformer_Diff  \
0   WIN/USDT    0.000061        -12.169744      37.516693         12.169805   
1  MANA/USDT    0.279600        -11.053042      40.352673         11.332642   
2   XLM/USDT    0.290700        -11.144263      40.420639         11.434963   
3   ONG/USDT    0.224900        -11.470650      39.937958         11.695550   
4   JST/USDT    0.030890        -12.129482      39.695839         12.160372   

   

In [92]:
# Check normalization stats
print("\nNormalizer statistics:")
print(f"Means: {normalizer.mean}")
print(f"Stddevs: {normalizer.std}")

# Check if predictions are normalized
sample_input = torch.randn(1, 14, len(features))
normalized = normalizer(sample_input)
print(f"\nInput scaling check - should be ~N(0,1):")
print(f"Mean: {normalized.mean().item():.2f}, Std: {normalized.std().item():.2f}")


Normalizer statistics:
Means: 3010698.5
Stddevs: 9415004.0

Input scaling check - should be ~N(0,1):
Mean: -0.32, Std: 0.00


In [93]:
x, _ = dataset[0]
print(x.shape)  # should be (sequence_length, num_features)


torch.Size([14, 11])
