In [51]:
import pandas as pd
from torchsummary import summary
from pathlib import Path
import random
import sys 
import os
import numpy as np
import torch
from torch.utils.data import DataLoader, random_split
import torch.optim as optim

In [52]:
# Get the project directory 
current_dir = os.path.abspath('') # Current '\notebooks' directory
project_dir = os.path.abspath(os.path.join(current_dir, '..')) # Move up one level to project root directory

# Add the project directory to sys.path
sys.path.append(project_dir)

# Move up to project directory
os.chdir(project_dir)
os.getcwd()

'/Users/aishwaryaiyer'

In [53]:
from src.models import *
from src.dataset import *


In [54]:
# Load your dataset

df = pd.read_csv('/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/data/processed/combined_dataset_v1.csv')

In [55]:
unique_symbols = df['symbol'].unique()
print(unique_symbols)

['ADA/USDT' 'ALGO/USDT' 'ANKR/USDT' 'ARDR/USDT' 'ARPA/USDT' 'ATOM/USDT'
 'BAL/USDT' 'BAND/USDT' 'BAT/USDT' 'BCH/USDT' 'BNB/USDT' 'BNT/USDT'
 'BTC/USDT' 'CELR/USDT' 'CHR/USDT' 'CHZ/USDT' 'COMP/USDT' 'COS/USDT'
 'COTI/USDT' 'CRV/USDT' 'CTSI/USDT' 'CTXC/USDT' 'CVC/USDT' 'DASH/USDT'
 'DATA/USDT' 'DCR/USDT' 'DENT/USDT' 'DGB/USDT' 'DOGE/USDT' 'DOT/USDT'
 'DUSK/USDT' 'ENJ/USDT' 'EOS/USDT' 'ETC/USDT' 'ETH/USDT' 'EUR/USDT'
 'FET/USDT' 'FTT/USDT' 'FUN/USDT' 'HBAR/USDT' 'HIVE/USDT' 'HOT/USDT'
 'ICX/USDT' 'IOST/USDT' 'IOTA/USDT' 'IOTX/USDT' 'JST/USDT' 'KAVA/USDT'
 'KMD/USDT' 'KNC/USDT' 'LINK/USDT' 'LRC/USDT' 'LSK/USDT' 'LTC/USDT'
 'LTO/USDT' 'LUNA/USDT' 'MANA/USDT' 'MBL/USDT' 'MDT/USDT' 'MKR/USDT'
 'MTL/USDT' 'NEO/USDT' 'NKN/USDT' 'NMR/USDT' 'NULS/USDT' 'OGN/USDT'
 'ONE/USDT' 'ONG/USDT' 'ONT/USDT' 'PAXG/USDT' 'QTUM/USDT' 'RLC/USDT'
 'RSR/USDT' 'RVN/USDT' 'SAND/USDT' 'SC/USDT' 'SNX/USDT' 'SOL/USDT'
 'STORJ/USDT' 'STPT/USDT' 'STX/USDT' 'SXP/USDT' 'TFUEL/USDT' 'THETA/USDT'
 'TROY/USDT' 'TRX/USDT' 'TU

In [56]:
# Assuming df is your original dataframe
df['date'] = pd.to_datetime(df['date'])  # Ensure date is datetime type
df = df.sort_values(['symbol', 'date'])  # Sort by symbol and date

# Function to get valid 14-day sequences ending on a specific date for a symbol
def get_14day_sequence_ending_on(symbol_data, end_date):
    end_date = pd.to_datetime(end_date)
    start_date = end_date - pd.Timedelta(days=13)  # 14 days inclusive
    
    # Filter data for the date range
    sequence = symbol_data[(symbol_data['date'] >= start_date) & 
                          (symbol_data['date'] <= end_date)]
    
    # Check if we have all 14 consecutive days
    if len(sequence) == 14:
        # Verify the dates are consecutive
        date_diff = sequence['date'].diff().dropna()
        if all(date_diff == pd.Timedelta(days=1)):
            return sequence
    return None

# Manually specify your 20 tickers and prediction date
selected_tickers = ['ADA/USDT', 'ALGO/USDT', 'ANKR/USDT', 'ARDR/USDT', 'ARPA/USDT', 'ATOM/USDT',
    'BAL/USDT', 'BAND/USDT', 'BAT/USDT', 'BCH/USDT', 'BNB/USDT', 'BNT/USDT',
    'BTC/USDT', 'CELR/USDT', 'CHR/USDT', 'CHZ/USDT', 'COMP/USDT', 'COS/USDT',
    'COTI/USDT', 'CRV/USDT', 'CTSI/USDT', 'CTXC/USDT', 'CVC/USDT', 'DASH/USDT',
    'DATA/USDT', 'DCR/USDT', 'DENT/USDT', 'DGB/USDT', 'DOGE/USDT', 'DOT/USDT',
    'DUSK/USDT', 'ENJ/USDT', 'EOS/USDT', 'ETC/USDT', 'ETH/USDT', 'EUR/USDT',
    'FET/USDT', 'FTT/USDT', 'FUN/USDT', 'HBAR/USDT', 'HIVE/USDT', 'HOT/USDT',
    'ICX/USDT', 'IOST/USDT', 'IOTA/USDT', 'IOTX/USDT', 'JST/USDT', 'KAVA/USDT',
    'KMD/USDT', 'KNC/USDT', 'LINK/USDT', 'LRC/USDT', 'LSK/USDT', 'LTC/USDT',
    'LTO/USDT', 'LUNA/USDT', 'MANA/USDT', 'MBL/USDT', 'MDT/USDT', 'MKR/USDT',
    'MTL/USDT', 'NEO/USDT', 'NKN/USDT', 'NMR/USDT', 'NULS/USDT', 'OGN/USDT',
    'ONE/USDT', 'ONG/USDT', 'ONT/USDT', 'PAXG/USDT', 'QTUM/USDT', 'RLC/USDT',
    'RSR/USDT', 'RVN/USDT', 'SAND/USDT', 'SC/USDT', 'SNX/USDT', 'SOL/USDT',
    'STORJ/USDT', 'STPT/USDT', 'STX/USDT', 'SXP/USDT', 'TFUEL/USDT', 'THETA/USDT',
    'TROY/USDT', 'TRX/USDT', 'TUSD/USDT', 'USDC/USDT', 'VET/USDT', 'VTHO/USDT',
    'WAN/USDT', 'WIN/USDT', 'XLM/USDT', 'XRP/USDT', 'XTZ/USDT', 'YFI/USDT',
    'ZEC/USDT', 'ZEN/USDT', 'ZIL/USDT', 'ZRX/USDT']

prediction_date = '2025-03-24'  # I hard-coded this for now bc all of the values are valid for 14-days
selected_sequences = []

for ticker in selected_tickers:
    # Get all data for this symbol
    symbol_data = df[df['symbol'] == ticker].sort_values('date')
    
    # Get the 14-day sequence ending on prediction_date
    sequence = get_14day_sequence_ending_on(symbol_data, prediction_date)
    
    if sequence is not None:
        selected_sequences.append(sequence)
    else:
        print(f"Warning: No valid 14-day sequence ending on {prediction_date} for {ticker}")

# Combine all valid sequences into one dataframe
if selected_sequences:
    final_df = pd.concat(selected_sequences)
    
    # Verify we have exactly 14 days per symbol
    print("\nDays per symbol in final dataset:")
    print(final_df.groupby('symbol').size())
    
    # Save to CSV if needed
    final_df.to_csv('14day_new_crypto_sequences_custom.csv', index=False)
    print(f"\nSuccessfully processed {len(selected_sequences)} out of {len(selected_tickers)} tickers")
else:
    print("No valid sequences found for the selected tickers and date")


Days per symbol in final dataset:
symbol
ADA/USDT     14
ALGO/USDT    14
ANKR/USDT    14
ARDR/USDT    14
ARPA/USDT    14
             ..
YFI/USDT     14
ZEC/USDT     14
ZEN/USDT     14
ZIL/USDT     14
ZRX/USDT     14
Length: 100, dtype: int64

Successfully processed 100 out of 100 tickers


In [57]:
class PredictionDataset(Dataset):
    """Dataset for making predictions on pre-processed 14-day windows"""
    def __init__(self, df, feature_cols, target_col='close'):
        """
        Args:
            df: DataFrame containing the 14-day sequences
            feature_cols: List of feature column names to use
            target_col: Name of target column (default 'close')
        """
        self.df = df
        self.feature_cols = feature_cols
        self.target_col = target_col
        
    def __len__(self):
        return len(self.df) // 14  # Each sequence is 14 days
    
    def __getitem__(self, idx):
        start_idx = idx * 14
        end_idx = start_idx + 14
        
        # Get input sequence features
        sequence = self.df.iloc[start_idx:end_idx][self.feature_cols].values
        # Get target (next day's close price)
        target = self.df.iloc[end_idx][self.target_col] if end_idx < len(self.df) else 0
        
        X = torch.tensor(sequence, dtype=torch.float32)
        y = torch.tensor(target, dtype=torch.float32)
        return X, y


In [58]:

def predict_crypto_prices(df, transformer_model, informer_model, normalizer, batch_size=32):
    """
    Make predictions using both Transformer and Informer models on 14-day crypto sequences.
    
    Args:
        df: DataFrame containing 14-day sequences for multiple cryptocurrencies
        transformer_model: Loaded CryptoTransformer model
        informer_model: Loaded CryptoInformer model
        normalizer: Normalizer object fitted to training data
        batch_size: Batch size for prediction
        
    Returns:
        DataFrame with predictions from both models
    """
    # Ensure models are in eval mode
    transformer_model.eval()
    informer_model.eval()
    
    # Get feature columns (exclude date and symbol)
    feature_cols = [col for col in df.columns if col not in ['date', 'symbol']]
    
    # Create dataset and dataloader
    dataset = PredictionDataset(df, feature_cols)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    # Store predictions
    transformer_preds = []
    informer_preds = []
    dates = []
    symbols = []
    actual_closes = []
    
    with torch.no_grad():
        for batch in dataloader:
            seq_batch, target_batch = batch
            
            # Normalize inputs
            seq_batch = normalizer(seq_batch)
            
            # Get predictions from both models
            transformer_output = transformer_model(seq_batch)
            informer_output = informer_model(seq_batch)
            
            transformer_preds.extend(transformer_output.numpy())
            informer_preds.extend(informer_output.numpy())
    
    # Create prediction DataFrame
    # We'll align predictions with the last day of each 14-day window
    prediction_points = []
    for i in range(len(transformer_preds)):
        idx = (i + 1) * 14 - 1  # Last index of each window
        if idx < len(df):
            prediction_points.append(idx)
    
    result_df = df.iloc[prediction_points].copy()
    result_df['transformer_pred'] = transformer_preds[:len(prediction_points)]
    result_df['informer_pred'] = informer_preds[:len(prediction_points)]
    
    # Calculate next day's actual close if available
    result_df['next_close'] = result_df['close'].shift(-1)
    
    return result_df[['date', 'symbol', 'close', 'next_close', 'transformer_pred', 'informer_pred']]



In [59]:
# Example usage:
if __name__ == "__main__":

    # Load your models (as you've shown)
    transformer_model_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/saved_models/CryptoTransformer/Best_R2.pth"
    informer_model_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/saved_models/CryptoInformer/Best_R2.pth"
    
    transformer_model = CryptoTransformer()  
    informer_model = CryptoInformer()
    
    transformer_model.load_state_dict(torch.load(transformer_model_path, map_location=torch.device('cpu')))
    informer_model.load_state_dict(torch.load(informer_model_path, map_location=torch.device('cpu')))
    
    train_data_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/data/processed/train_set.csv"
    train_dataset = CryptoDataset(train_data_path)
    normalizer = Normalizer()
    normalizer.fit(train_dataset)

    

    # Load your input DataFrame (14-day windows for top 20 cryptos)
    df = pd.read_csv('14day_new_crypto_sequences_custom.csv')
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(['symbol', 'date'])
    
    # Make predictions
    predictions_df = predict_crypto_prices(df, transformer_model, informer_model, normalizer)
    
    # Save results
    predictions_df.to_csv("crypto_predictions.csv", index=False)
    print("Predictions saved to crypto_predictions.csv")

  transformer_model.load_state_dict(torch.load(transformer_model_path, map_location=torch.device('cpu')))
  informer_model.load_state_dict(torch.load(informer_model_path, map_location=torch.device('cpu')))


Predictions saved to crypto_predictions.csv


In [60]:
print(predictions_df)

           date     symbol       close  next_close  transformer_pred  \
13   2025-03-24   ADA/USDT     0.73200     0.20360         32.198696   
27   2025-03-24  ALGO/USDT     0.20360     0.02058         33.195305   
41   2025-03-24  ANKR/USDT     0.02058     0.06380         33.230850   
55   2025-03-24  ARDR/USDT     0.06380     0.02994         33.293629   
69   2025-03-24  ARPA/USDT     0.02994     4.95600         33.245804   
...         ...        ...         ...         ...               ...   
1343 2025-03-24   YFI/USDT  5416.00000    32.44000       3050.841064   
1357 2025-03-24   ZEC/USDT    32.44000     9.67000         33.759171   
1371 2025-03-24   ZEN/USDT     9.67000     0.01296         33.395817   
1385 2025-03-24   ZIL/USDT     0.01296     0.29140         33.187714   
1399 2025-03-24   ZRX/USDT     0.29140         NaN         33.283978   

      informer_pred  
13        10.673519  
27         9.216707  
41         9.129238  
55         9.180437  
69         9.134610  
...

In [61]:
predictions_df['percentage_increase'] = (predictions_df['informer_pred'] - predictions_df['close']) / predictions_df['close'] * 100

In [62]:
from sklearn.metrics import r2_score
import pandas as pd

# Assuming your DataFrame is named df
# First, drop rows where next_close or transformer_pred is NaN
predictions_df_clean = predictions_df.dropna(subset=['next_close', 'transformer_pred'])

# Calculate R² score
r2t = r2_score(predictions_df_clean['next_close'], predictions_df_clean['transformer_pred'])
r2i = r2_score(predictions_df_clean['next_close'], predictions_df_clean['informer_pred'])

print(f"R² score between next_close and transformer_pred: {r2t:.4f}")
print(f"R² score between next_close and informer_pred: {r2i:.4f}")



R² score between next_close and transformer_pred: -0.5724
R² score between next_close and informer_pred: -0.5957


In [63]:
# Calculate the total percentage increase (predicted)
total_percentage_increase = predictions_df['percentage_increase'].sum()

# Avoid division by zero
if total_percentage_increase == 0:
    raise ValueError("Total predicted percentage increase is zero, cannot calculate proportion.")

# Proportion and coin allocation based on predicted increase
predictions_df['proportion'] = predictions_df['percentage_increase'] / total_percentage_increase
predictions_df['percent_of_coins'] = (predictions_df['proportion'] * 100).round()

# Adjust to ensure total = 100
total_coins = predictions_df['percent_of_coins'].sum()
if total_coins != 100:
    max_error_symbol = predictions_df.loc[predictions_df['percent_of_coins'].idxmax(), 'symbol']
    predictions_df.loc[predictions_df['symbol'] == max_error_symbol, 'percent_of_coins'] -= total_coins - 100

# ---------------------------------------
# Now calculate actual allocation
# ---------------------------------------

# Calculate actual percentage increase
predictions_df['actual_percentage_increase'] = (
    (predictions_df['next_close'] - predictions_df['close']) / predictions_df['close']
) * 100

# Sum of actual percentage increases
total_actual_increase = predictions_df['actual_percentage_increase'].sum()

if total_actual_increase == 0:
    raise ValueError("Total actual percentage increase is zero, cannot calculate proportion.")

# Proportion and coin allocation based on actual increase
predictions_df['actual_proportion'] = predictions_df['actual_percentage_increase'] / total_actual_increase
predictions_df['percent_of_coins_actual'] = (predictions_df['actual_proportion'] * 100).round()

# Adjust to ensure total = 100
total_actual_coins = predictions_df['percent_of_coins_actual'].sum()
if total_actual_coins != 100:
    max_error_symbol_actual = predictions_df.loc[predictions_df['percent_of_coins_actual'].idxmax(), 'symbol']
    predictions_df.loc[
        predictions_df['symbol'] == max_error_symbol_actual, 'percent_of_coins_actual'
    ] -= total_actual_coins - 100

# Final check
assert predictions_df['percent_of_coins'].sum() == 100, "Predicted total is not 100"
assert predictions_df['percent_of_coins_actual'].sum() == 100, "Actual total is not 100"

# Save results
predictions_df.to_csv('final_predictions_latest.csv', index=False)
print(predictions_df)

print("Final predictions saved to 'final_predictions.csv'")


           date     symbol       close  next_close  transformer_pred  \
13   2025-03-24   ADA/USDT     0.73200     0.20360         32.198696   
27   2025-03-24  ALGO/USDT     0.20360     0.02058         33.195305   
41   2025-03-24  ANKR/USDT     0.02058     0.06380         33.230850   
55   2025-03-24  ARDR/USDT     0.06380     0.02994         33.293629   
69   2025-03-24  ARPA/USDT     0.02994     4.95600         33.245804   
...         ...        ...         ...         ...               ...   
1343 2025-03-24   YFI/USDT  5416.00000    32.44000       3050.841064   
1357 2025-03-24   ZEC/USDT    32.44000     9.67000         33.759171   
1371 2025-03-24   ZEN/USDT     9.67000     0.01296         33.395817   
1385 2025-03-24   ZIL/USDT     0.01296     0.29140         33.187714   
1399 2025-03-24   ZRX/USDT     0.29140         NaN         33.283978   

      informer_pred  percentage_increase    proportion  percent_of_coins  \
13        10.673519          1358.131029  4.966509e-05     

In [64]:

average_price = df['close'].mean()

print(f"Average price: {average_price:.2f}")

Average price: 968.32
