In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Read the data
data = pd.concat([pd.read_csv(f'prices_round_1_day_{day}.csv', sep=';') for day in ['-2', '-1', '0']])

data['spread'] = data['ask_price_1'] - data['bid_price_1']
data['mid_price_change'] = data['mid_price'].diff()
data['volume_imbalance'] = (data['bid_volume_1'] - data['ask_volume_1']) / (data['bid_volume_1'] + data['ask_volume_1'])
data['sma_5'] = data['mid_price'].rolling(window=5).mean()

def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)

    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_macd(data, slow=26, fast=12, signal=9):
    exp1 = data.ewm(span=fast, adjust=False).mean()
    exp2 = data.ewm(span=slow, adjust=False).mean()
    macd = exp1 - exp2
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line

data['rsi'] = calculate_rsi(data['mid_price'])

data['macd'], data['signal_line'] = calculate_macd(data['mid_price'])

df_starfruit = data.loc[data['product'] == 'STARFRUIT']

df_starfruit.head()

Unnamed: 0,day,timestamp,product,bid_price_1,bid_volume_1,bid_price_2,bid_volume_2,bid_price_3,bid_volume_3,ask_price_1,...,ask_volume_3,mid_price,profit_and_loss,spread,mid_price_change,volume_imbalance,sma_5,rsi,macd,signal_line
1,-2,0,STARFRUIT,4997,26,,,,,5003,...,,5000.0,0.0,6,-5000.0,0.0,,0.0,-398.860399,-79.77208
3,-2,100,STARFRUIT,5000,5,4998.0,6.0,4997.0,20.0,5004,...,,5002.0,0.0,4,-5000.5,-0.6,,33.343331,-631.765006,-226.647551
4,-2,200,STARFRUIT,4997,1,4996.0,30.0,,,5003,...,,5000.0,0.0,6,-2.0,-0.9375,7000.9,33.338887,-878.580811,-357.034203
7,-2,300,STARFRUIT,4996,27,,,,,5003,...,,4999.5,0.0,7,-5000.5,0.0,7000.3,40.0012,-741.84122,-493.914361
9,-2,400,STARFRUIT,4995,20,,,,,5002,...,,4998.5,0.0,7,-4998.0,0.0,7998.9,42.855102,-764.96858,-554.64651


In [6]:
# Define features and target
features = df_starfruit[['spread', 'mid_price_change', 'volume_imbalance', 'rsi', 'macd']]
target = df_starfruit['mid_price'].values.reshape(-1, 1)

# Scale the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

# Convert to tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32)

# Neural network with regularization
class OrchidNet(nn.Module):
    def __init__(self, input_size):
        super(OrchidNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.bn1 = nn.BatchNorm1d(64)  # Batch normalization
        self.fc2 = nn.Linear(64, 32)
        self.dropout = nn.Dropout(0.1)  # Dropout
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

model = OrchidNet(X_train_torch.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)  # L2 regularization
criterion = nn.MSELoss()

# Training with early stopping
best_loss = float('inf')
patience, trials = 5, 0
for epoch in range(100):
    model.train()
    for features, labels in DataLoader(X_train_torch, batch_size=32, shuffle=True):
        optimizer.zero_grad()
        output = model(features)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

    # Validation loss for early stopping
    model.eval()
    with torch.no_grad():
        val_loss = criterion(model(X_test_torch), y_test_torch)
    if val_loss < best_loss:
        best_loss = val_loss
        trials = 0
    else:
        trials += 1
        if trials >= patience:
            print("Early stopping triggered.")
            break

# Coefficients and performance evaluation
model.eval()
with torch.no_grad():
    predictions = model(X_test_torch)
    final_mse = mean_squared_error(y_test_torch.numpy(), predictions.numpy())
    final_r2 = r2_score(y_test_torch.numpy(), predictions.numpy())

coefficients = model.fc1.weight.detach().numpy()[0]
intercept = model.fc3.bias.item()

print(f"MSE: {final_mse}, R^2: {final_r2}, Coefficients: {coefficients}, Intercept: {intercept}")


ValueError: too many values to unpack (expected 2)