In [3]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# 加载数据
data = pd.read_csv('new.csv')
data = data*100
X = data.iloc[:, 5:].values
y = data.iloc[:, 0].values

# 划分数据集
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, shuffle=False)  # 0.25 * 0.8 = 0.2

# 转换为PyTorch张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

# 创建数据加载器
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# 定义模型
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 1)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

model = SimpleNN(input_size=X_train.shape[1])

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 100
best_val_loss = float('inf')
early_stopping_patience = 10
early_stopping_counter = 0

for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    
    # 验证模型
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)
    
    if val_loss.item() < best_val_loss:
        best_val_loss = val_loss.item()
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
    
    if early_stopping_counter >= early_stopping_patience:
        print(f'Early stopping at epoch {epoch+1}')
        break

    r2 = r2_score(y_val_tensor.numpy(), val_outputs.numpy())
    mse = mean_squared_error(y_val_tensor.numpy(), val_outputs.numpy())
    # Assuming your Sharpe ratio calculation is based on returns, you might need to adjust this
    # sharpe = sharpe_ratio(valid_y.numpy(), predictions.numpy())
    # accuracy = ((y_val_tensor > 0) == (valid_y > 0)).float().mean()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}')
    print(f'R2: {r2}, MSE: {mse}')


Epoch 1/100, Loss: 0.4471377730369568, Val Loss: 5.656149387359619
R2: -0.022148153120807645, MSE: 5.656149864196777
Epoch 2/100, Loss: 0.17983154952526093, Val Loss: 5.673184871673584
R2: -0.025226609593864024, MSE: 5.673184871673584
Epoch 3/100, Loss: 0.7086482048034668, Val Loss: 5.660938739776611
R2: -0.02301346402659088, MSE: 5.660938262939453
Epoch 4/100, Loss: 0.863491415977478, Val Loss: 5.584767818450928
R2: -0.009248308525935167, MSE: 5.584766864776611
Epoch 5/100, Loss: 0.2666064202785492, Val Loss: 5.605545520782471
R2: -0.013003215771876242, MSE: 5.605545520782471
Epoch 6/100, Loss: 0.7151561975479126, Val Loss: 5.593504428863525
R2: -0.010827267063228119, MSE: 5.593504428863525
Epoch 7/100, Loss: 0.5534029006958008, Val Loss: 5.578756332397461
R2: -0.008161925188693075, MSE: 5.5787553787231445
Epoch 8/100, Loss: 0.8696029782295227, Val Loss: 5.604162216186523
R2: -0.01275323593443245, MSE: 5.604162216186523
Epoch 9/100, Loss: 0.4410631060600281, Val Loss: 5.56871891021728