In [None]:
'''
导入所需库
'''

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from scipy.interpolate import interp1d
from PIL import Image
import torch

import torch.nn as nn
import torch.nn.functional as F
from torch.nn.init import xavier_uniform_
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from tqdm import tqdm
import pandas as pd
import pandas_market_calendars as mcal

# Labeling and Spliting

In [None]:
import os
import pandas as pd

# 1. 读取主数据和锚点文件
df = pd.read_feather('train_data.feather')
anchors = pd.read_feather('trading_days_anchor_1993_2000.feather')

# 2. 合并锚点信息
df = df.merge(
    anchors[['date', 'anchor_5', 'anchor_20', 'anchor_60']],
    on=['date'],
    how='left'
).sort_values(['id', 'date']).reset_index(drop=True)

# 3. 定义每种窗口的配置：窗口长度 / 锚点列 / 输出路径 / 输出标签文件
configs = [
    (5,  'anchor_5',  './charts_train/5d_charts',  './labels_train/image_labels_i5.feather'),
    (20, 'anchor_20', './charts_train/20d_charts', './labels_train/image_labels_i20.feather'),
    (60, 'anchor_60', './charts_train/60d_charts', './labels_train/image_labels_i60.feather'),
]

for window, anchor_col, img_dir, out_feather in configs:
    os.makedirs(img_dir, exist_ok=True)
    os.makedirs(os.path.dirname(out_feather), exist_ok=True)

    image_labels = []

    for id_val, grp in df.groupby('id'):
        grp = grp.reset_index(drop=True)
        # 找到所有锚点行索引
        idxs = grp.index[grp[anchor_col] == 1.0].tolist()

        for idx in idxs:
            # 前面必须有 window-1 条数据
            if idx >= window - 1:
                win = grp.iloc[idx - (window - 1): idx + 1]

                # 计算三个 horizon 的 label：看窗口末期的 ret_Xd 是否 > 0
                label_5  = int(win['ret_5d'].iloc[-1]  > 0)
                label_20 = int(win['ret_20d'].iloc[-1] > 0)
                label_60 = int(win['ret_60d'].iloc[-1] > 0)

                # 锚点日期（窗口最后一天）
                anchor_date = pd.to_datetime(win['date'].iloc[-1]).strftime('%Y%m%d')

                # 对应的图像路径
                image_path = os.path.join(img_dir, f'id_{id_val}_{anchor_date}.png')

                if os.path.exists(image_path):
                    image_labels.append({
                        'image_path': image_path,
                        'id':         id_val,
                        'date':       win['date'].iloc[-1],
                        'label_5':    label_5,
                        'label_20':   label_20,
                        'label_60':   label_60,
                    })

    # 保存标签
    labels_df = pd.DataFrame(image_labels)
    labels_df.to_feather(out_feather)
    print(f"{window}d 图像标签已生成并保存到 {out_feather}")


In [None]:
def balance_and_split(labels_file, label_column, train_file, test_file):
    # 读取标签文件
    labels_df = pd.read_feather(labels_file)

    # 获取标签为 0 和 1 的数据
    label_0 = labels_df[labels_df[label_column] == 0]
    label_1 = labels_df[labels_df[label_column] == 1]

    # 确保选择相同数量的标签为 0 和 1 的样本
    num_samples = min(len(label_0), len(label_1))
    label_0 = label_0.sample(n=num_samples, random_state=42)
    label_1 = label_1.sample(n=num_samples, random_state=42)

    # 合并平衡后的数据并打乱
    balanced_df = pd.concat([label_0, label_1]).sample(frac=1, random_state=42).reset_index(drop=True)

    # 按照 70% 训练集和 30% 测试集划分
    train_df, test_df = train_test_split(
        balanced_df, train_size=0.7, stratify=balanced_df[label_column], random_state=42
    )

    # 确保训练集和测试集标签分布均衡
    print(f"训练集标签分布 ({label_column}):")
    print(train_df[label_column].value_counts())
    print(f"\n测试集标签分布 ({label_column}):")
    print(test_df[label_column].value_counts())

    # 保存训练集和测试集到 Feather 文件
    train_df.to_feather(train_file)
    test_df.to_feather(test_file)

    print(f"数据集 {label_column} 已划分并保存！")

In [None]:
horizons = [5, 20, 60]
labels = [5, 20, 60]

for h in horizons:
    for l in labels:
        balance_and_split(
            labels_file = f'./labels_train/image_labels_i{h}.feather',
            label_column = f'label_{l}',
            train_file = f'./labels_train/train_labels_i{h}r{l}.feather',
            test_file  = f'./labels_train/test_labels_i{h}r{l}.feather',
        )

In [None]:
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
print(f"使用设备：{device}")

# Training

In [None]:
'''
定义 CNN
'''

class CNN5DModel(nn.Module):
    def __init__(self):
        super(CNN5DModel, self).__init__()
  
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(5, 3), stride=(1, 1), dilation=(1, 1), padding=(2, 1)),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1))  # 输出尺寸: (64, 16, 15)
        )

        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(5, 3), stride=(1, 1), padding=(2, 1)),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1)),
            nn.Flatten(),
            nn.Dropout(0.5)  # 在全连接层前应用Dropout
        )
        
        # 全连接层
        self.fc = nn.Linear(15360, 2)
        
        # 初始化权重
        self._initialize_weights()
        
        self.model = nn.Sequential(self.block1, self.block2, self.fc)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        return self.model(x)


class CNN20DModel(nn.Module):
    def __init__(self):
        super().__init__()
        # 卷积块1
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(5, 3), stride=(3, 1), dilation=(2, 1), padding=(3, 1)),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1))
        )
        
        # 卷积块2
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(5, 3), padding=(2, 1)),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1))
        )
        
        # 卷积块3（包含Flatten和Dropout）
        self.block3 = nn.Sequential(
            nn.Conv2d(128, 256, (5, 3), padding=(3, 1)),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1)),
            nn.Flatten(),
            nn.Dropout(p=0.5)  # 全连接层前的Dropout
        )
        
        # 全连接层
        self.fc = nn.Linear(46080, 2)
        
        # 初始化权重
        self._initialize_weights()
        
        self.model = nn.Sequential(
            self.block1,
            self.block2,
            self.block3,
            self.fc
        )

    def _initialize_weights(self):
        """应用Xavier初始化"""
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        return self.model(x)


class CNN60DModel(nn.Module):
    def __init__(self):
        super(CNN60DModel, self).__init__()
        
        # 第一个 CNN 构建模块
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(5, 3), stride=(1, 1), padding=(2, 1), dilation=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1))  # 高度减半至48，宽度保持180
        )

        # 第二个 CNN 构建模块
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(5, 3), stride=(1, 1), padding=(2, 1)),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1))  # 高度减半至24，宽度保持180
        )

        # 第三个 CNN 构建模块
        self.block3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(5, 3), stride=(1, 1), padding=(2, 1)),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1))  # 高度减半至12，宽度保持180
        )

        # 第四个 CNN 构建模块（关键修改）
        self.block4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=(5, 3), stride=(1, 1), padding=(2, 1)),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d(kernel_size=(2, 3), stride=(2, 3)),
            nn.Flatten(),
            nn.Dropout(p=0.5)  # 全连接层前应用Dropout
        )

        # 全连接层
        self.fc = nn.Linear(184320, 2)
        
        # 初始化权重
        self._initialize_weights()
        
        self.model = nn.Sequential(
            self.block1,
            self.block2,
            self.block3,
            self.block4,
            self.fc
        )

    def _initialize_weights(self):
        """应用Xavier初始化"""
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        return self.model(x)

## i5r5

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN5DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer

# 训练函数，包含早停机制

def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=2):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i5r5.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i5r5.pth'))
                break

    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i5r5 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i5r5_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions

class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_5']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i5r5.feather',
    img_dir='./charts_train/5d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i5r5.feather',
    img_dir='./charts_train/5d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)

model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i5r5.feather')
test_df['pre_label_5'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i5r5.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i5r5.feather')

# 计算准确率
correct_predictions = (test_df['label_5'] == test_df['pre_label_5']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")

## i5r20

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN5DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer

# 训练函数，包含早停机制

def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=2):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i5r20.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i5r20.pth'))
                break

    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i5r20 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i5r20_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions


class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_20']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i5r20.feather',
    img_dir='./charts_train/5d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i5r20.feather',
    img_dir='./charts_train/5d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)


model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i5r20.feather')
test_df['pre_label_20'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i5r20.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i5r20.feather')

# 计算准确率
correct_predictions = (test_df['label_20'] == test_df['pre_label_20']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")

## i5r60

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN5DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer

# 训练函数，包含早停机制

def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=2):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i5r60.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i5r60.pth'))
                break

    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i5r60 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i5r60_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions


class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_60']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i5r60.feather',
    img_dir='./charts_train/5d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i5r60.feather',
    img_dir='./charts_train/5d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)


model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i5r60.feather')
test_df['pre_label_60'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i5r60.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i5r60.feather')

# 计算准确率
correct_predictions = (test_df['label_60'] == test_df['pre_label_60']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")

## i20r5

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN20DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer

# 训练函数，包含早停机制

def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=2):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i20r5.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i20r5.pth'))
                break


    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i20r5 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i20r5_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions



class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_5']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i20r5.feather',
    img_dir='./charts_train/20d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i20r5.feather',
    img_dir='./charts_train/20d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)


model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i20r5.feather')
test_df['pre_label_5'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i20r5.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i20r5.feather')

# 计算准确率
correct_predictions = (test_df['label_5'] == test_df['pre_label_5']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")

## i20r20

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN20DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer

# 训练函数，包含早停机制
def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=2):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i20r20.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i20r20.pth'))
                break

    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i20r20 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i20r20_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions

class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_20']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i20r20.feather',
    img_dir='./charts_train/20d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i20r20.feather',
    img_dir='./charts_train/20d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)

model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i20r20.feather')
test_df['pre_label_20'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i20r20.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i20r20.feather')

# 计算准确率
correct_predictions = (test_df['label_20'] == test_df['pre_label_20']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")

## i20r60

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN20DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer

# 训练函数，包含早停机制

def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=2):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i20r60.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i20r60.pth'))
                break

    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i20r60 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i20r60_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions


class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_60']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i20r60.feather',
    img_dir='./charts_train/20d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i20r60.feather',
    img_dir='./charts_train/20d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)


model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i20r60.feather')
test_df['pre_label_60'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i20r60.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i20r60.feather')

# 计算准确率
correct_predictions = (test_df['label_60'] == test_df['pre_label_60']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")

## i60r5

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN60DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer

# 训练函数，包含早停机制
def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=5):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i60r5.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i60r5.pth'))
                break


    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i60r5 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i60r5_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions


class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_5']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i60r5.feather',
    img_dir='./charts_train/60d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i60r5.feather',
    img_dir='./charts_train/60d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)

model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i60r5.feather')
test_df['pre_label_5'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i60r5.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i60r5.feather')

# 计算准确率
correct_predictions = (test_df['label_5'] == test_df['pre_label_5']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")

## i60r20

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN60DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer
# 训练函数，包含早停机制

def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=5):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i60r20.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i60r20.pth'))
                break
        


    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i60r20 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i60r20_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions

class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_20']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i60r20.feather',
    img_dir='./charts_train/60d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i60r20.feather',
    img_dir='./charts_train/60d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)

model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i60r20.feather')
test_df['pre_label_20'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i60r20.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i60r20.feather')

# 计算准确率
correct_predictions = (test_df['label_20'] == test_df['pre_label_20']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")

## i60r60

In [None]:
def get_model_optimizer(learning_rate=1e-5):  # 修改学习率为 1e-5
    model = CNN60DModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer
# 训练函数，包含早停机制

def train_and_predict(model, train_loader, val_loader, criterion, optimizer, 
                     max_epochs=200, patience=5):
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    predictions = {}
    
    # 用于记录每个 epoch 的 train_loss 和 val_loss
    train_losses = []
    val_losses = []
    
    for epoch in range(max_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        pbar = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{max_epochs}]')
        
        for batch_idx, (data, target, _) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pbar.set_postfix({'train_loss': f'{train_loss/(batch_idx+1):.4f}'})
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)  # 记录当前 epoch 的 train_loss
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target, paths in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()
                
                # 保存预测结果
                pred = output.argmax(dim=1).cpu().numpy()
                for path, p in zip(paths, pred):
                    predictions[path] = int(p)
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)  # 记录当前 epoch 的 val_loss
        
        print(f'Epoch [{epoch+1}/{max_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}')
        
        # 早停检查
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), './labels_train/best_model_i60r60.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}')
                model.load_state_dict(torch.load('./labels_train/best_model_i60r60.pth'))
                break
       


    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('i60r60 Train and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.savefig('./labels_train/i60r60_loss_curve.png')  # 保存图像为文件
    plt.show()  # 显示图像
    
    return model, predictions

class StockImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels_frame = pd.read_feather(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels_frame.iloc[idx]['image_path'].split('/')[-1])
        # 直接读取为灰度图像，与生成函数保持一致
        image = Image.open(img_path)
        label = self.labels_frame.iloc[idx]['label_60']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path
    
# 修改数据转换，保持与生成图像一致的尺寸
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化
])

# 创建数据集和数据加载器
train_dataset = StockImageDataset(
    csv_file='./labels_train/train_labels_i60r60.feather',
    img_dir='./charts_train/60d_charts',
    transform=transform
)

test_dataset = StockImageDataset(
    csv_file='./labels_train/test_labels_i60r60.feather',
    img_dir='./charts_train/60d_charts',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128)

model, criterion, optimizer = get_model_optimizer()
trained_model, predictions = train_and_predict(model, train_loader, val_loader, criterion, optimizer)

# 将预测结果添加到标签文件
test_df = pd.read_feather('./labels_train/test_labels_i60r60.feather')
test_df['pre_label_60'] = test_df['image_path'].map(predictions)
test_df.to_feather('./labels_train/test_labels_with_predictions_i60r60.feather')

print("训练完成")

In [None]:
# 读取预测结果文件
test_df = pd.read_feather('./labels_train/test_labels_with_predictions_i60r60.feather')

# 计算准确率
correct_predictions = (test_df['label_60'] == test_df['pre_label_60']).sum()
total_predictions = len(test_df)
accuracy = correct_predictions / total_predictions * 100

print(f"accuracy: {accuracy:.2f}%")