In [1]:
import pandas as pd
from torchsummary import summary
import random
import sys 
import os
import numpy as np
import torch
from torch.utils.data import DataLoader, random_split
import torch.optim as optim


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/traitlets/config/application.py", line 1053, in launch_instance
    app.start()
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/ipyk

AttributeError: _ARRAY_API not found

In [2]:
# Get the project directory 
current_dir = os.path.abspath('') # Current '\notebooks' directory
project_dir = os.path.abspath(os.path.join(current_dir, '..')) # Move up one level to project root directory

# Add the project directory to sys.path
sys.path.append(project_dir)

# Move up to project directory
os.chdir(project_dir)
os.getcwd()

'/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction'

In [3]:
from src.models import *
from src.dataset import *


In [8]:
import pandas as pd
import numpy as np

# Load your dataset
df = pd.read_csv('/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/data/processed/combined_dataset_v1.csv')
df['date'] = pd.to_datetime(df['date'])  # Ensure date is datetime type
df = df.sort_values(['symbol', 'date'])  # Sort by symbol and date

# Function to get valid 14-day sequences for a symbol
def get_14day_sequences(symbol_data, min_sequence_length=14):
    sequences = []
    current_sequence = []
    
    for _, row in symbol_data.iterrows():
        if not current_sequence:
            current_sequence.append(row)
        else:
            # Check if this row is consecutive to the last one in current sequence
            last_date = current_sequence[-1]['date']
            if row['date'] == last_date + pd.Timedelta(days=1):
                current_sequence.append(row)
            else:
                # Reset if not consecutive
                current_sequence = [row]
        
        # If we have enough consecutive days, save it and move window by 1 day
        if len(current_sequence) >= min_sequence_length:
            sequences.append(current_sequence[:min_sequence_length])
            current_sequence = current_sequence[1:]  # Slide window by 1 day
    
    return sequences

# Get 20 random tickers with enough data
all_symbols = df['symbol'].unique()
np.random.seed(42)  # For reproducibility

selected_symbols = []
selected_sequences = []

# Keep trying until we get 20 symbols with valid sequences
while len(selected_symbols) < 20 and len(all_symbols) > 0:
    # Randomly select a symbol
    symbol = np.random.choice(all_symbols)
    all_symbols = all_symbols[all_symbols != symbol]  # Remove from pool
    
    # Get all data for this symbol
    symbol_data = df[df['symbol'] == symbol].sort_values('date')
    
    # Get all valid 14-day sequences
    sequences = get_14day_sequences(symbol_data)
    
    # If we found at least one valid sequence, add to our selection
    if sequences:
        selected_symbols.append(symbol)
        # Take the first sequence for this symbol (you could randomize this too)
        selected_sequences.extend(sequences[0])

# Create the final DataFrame with 14-day sequences for 20 random cryptos
final_df = pd.DataFrame(selected_sequences)

# Verify we have exactly 14 days per symbol
print("Days per symbol in final dataset:")
print(final_df.groupby('symbol').size())

# Save to CSV if needed
final_df.to_csv('14day_crypto_sequences.csv', index=False)
print(f"Selected {len(selected_symbols)} symbols with valid 14-day sequences")

Days per symbol in final dataset:
symbol
ALGO/USDT    14
ANKR/USDT    14
ARDR/USDT    14
CHR/USDT     14
CTXC/USDT    14
DATA/USDT    14
DCR/USDT     14
EUR/USDT     14
IOTA/USDT    14
LRC/USDT     14
MDT/USDT     14
NKN/USDT     14
RVN/USDT     14
SNX/USDT     14
STPT/USDT    14
STX/USDT     14
USDC/USDT    14
XLM/USDT     14
XRP/USDT     14
ZEN/USDT     14
dtype: int64
Selected 20 symbols with valid 14-day sequences


In [5]:
class PredictionDataset(Dataset):
    """Dataset for making predictions on pre-processed 14-day windows"""
    def __init__(self, df, feature_cols, target_col='close'):
        """
        Args:
            df: DataFrame containing the 14-day sequences
            feature_cols: List of feature column names to use
            target_col: Name of target column (default 'close')
        """
        self.df = df
        self.feature_cols = feature_cols
        self.target_col = target_col
        
    def __len__(self):
        return len(self.df) // 14  # Each sequence is 14 days
    
    def __getitem__(self, idx):
        start_idx = idx * 14
        end_idx = start_idx + 14
        
        # Get input sequence features
        sequence = self.df.iloc[start_idx:end_idx][self.feature_cols].values
        # Get target (next day's close price)
        target = self.df.iloc[end_idx][self.target_col] if end_idx < len(self.df) else 0
        
        X = torch.tensor(sequence, dtype=torch.float32)
        y = torch.tensor(target, dtype=torch.float32)
        return X, y


In [6]:

def predict_crypto_prices(df, transformer_model, informer_model, normalizer, batch_size=32):
    """
    Make predictions using both Transformer and Informer models on 14-day crypto sequences.
    
    Args:
        df: DataFrame containing 14-day sequences for multiple cryptocurrencies
        transformer_model: Loaded CryptoTransformer model
        informer_model: Loaded CryptoInformer model
        normalizer: Normalizer object fitted to training data
        batch_size: Batch size for prediction
        
    Returns:
        DataFrame with predictions from both models
    """
    # Ensure models are in eval mode
    transformer_model.eval()
    informer_model.eval()
    
    # Get feature columns (exclude date and symbol)
    feature_cols = [col for col in df.columns if col not in ['date', 'symbol']]
    
    # Create dataset and dataloader
    dataset = PredictionDataset(df, feature_cols)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    # Store predictions
    transformer_preds = []
    informer_preds = []
    dates = []
    symbols = []
    actual_closes = []
    
    with torch.no_grad():
        for batch in dataloader:
            seq_batch, target_batch = batch
            
            # Normalize inputs
            seq_batch = normalizer(seq_batch)
            
            # Get predictions from both models
            transformer_output = transformer_model(seq_batch)
            informer_output = informer_model(seq_batch)
            
            transformer_preds.extend(transformer_output.numpy())
            informer_preds.extend(informer_output.numpy())
    
    # Create prediction DataFrame
    # We'll align predictions with the last day of each 14-day window
    prediction_points = []
    for i in range(len(transformer_preds)):
        idx = (i + 1) * 14 - 1  # Last index of each window
        if idx < len(df):
            prediction_points.append(idx)
    
    result_df = df.iloc[prediction_points].copy()
    result_df['transformer_pred'] = transformer_preds[:len(prediction_points)]
    result_df['informer_pred'] = informer_preds[:len(prediction_points)]
    
    # Calculate next day's actual close if available
    result_df['next_close'] = result_df['close'].shift(-1)
    
    return result_df[['date', 'symbol', 'close', 'next_close', 'transformer_pred', 'informer_pred']]



In [11]:
# Example usage:
if __name__ == "__main__":
    # Load your models (as you've shown)
    transformer_model_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/saved_models/CryptoTransformer_2025-04-09_21-31-23/CryptoTransformer_BEST_R2.pth"
    informer_model_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/saved_models/CryptoInformer_2025-04-10_15-11-19/CryptoInformer_BEST_R2.pth"
    
    transformer_model = CryptoTransformer()  
    informer_model = CryptoInformer()
    
    transformer_model.load_state_dict(torch.load(transformer_model_path, map_location=torch.device('cpu')))
    informer_model.load_state_dict(torch.load(informer_model_path, map_location=torch.device('cpu')))
    
    train_data_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/data/processed/combined_dataset_v1.csv"
    train_dataset = CryptoDataset(train_data_path)
    normalizer = Normalizer()
    normalizer.fit(train_dataset)

    # Load your input DataFrame (14-day windows for top 20 cryptos)
    df = pd.read_csv('14day_crypto_sequences.csv')
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(['symbol', 'date'])
    
    # Make predictions
    predictions_df = predict_crypto_prices(df, transformer_model, informer_model, normalizer)
    
    # Save results
    predictions_df.to_csv("crypto_predictions.csv", index=False)
    print("Predictions saved to crypto_predictions.csv")

  transformer_model.load_state_dict(torch.load(transformer_model_path, map_location=torch.device('cpu')))
  informer_model.load_state_dict(torch.load(informer_model_path, map_location=torch.device('cpu')))


Predictions saved to crypto_predictions.csv


In [12]:
print(predictions_df)

          date     symbol     close  next_close  transformer_pred  \
223 2022-04-06  ALGO/USDT   0.76610     0.07738         -3.402395   
181 2022-04-06  ANKR/USDT   0.07738     0.21540         -3.641128   
265 2022-04-06  ARDR/USDT   0.21540     0.49450         -3.384028   
41  2022-04-06   CHR/USDT   0.49450     0.27320         -3.575320   
83  2022-04-06  CTXC/USDT   0.27320     0.07859         -3.526515   
195 2022-04-06  DATA/USDT   0.07859    58.70000         -3.677765   
167 2022-04-06   DCR/USDT  58.70000     1.08800         -2.926293   
237 2022-04-06   EUR/USDT   1.08800     0.73600         -3.337085   
251 2022-04-06  IOTA/USDT   0.73600     0.96680         -3.359430   
13  2022-04-06   LRC/USDT   0.96680     0.05448         -3.539128   
209 2022-04-06   MDT/USDT   0.05448     0.23410         -3.630266   
69  2022-04-06   NKN/USDT   0.23410     0.05914         -3.453286   
55  2022-04-06   RVN/USDT   0.05914     5.38400         -3.891540   
279 2022-04-06   SNX/USDT   5.3840