In [9]:
import torch
import torch.nn as nn
import pandas as pd
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10

In [64]:
# 定义CNN模型
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 定义一维卷积层
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3)
        # 定义池化层
        self.pool = nn.MaxPool1d(kernel_size=2)
        # 定义全连接层
        self.fc1 = nn.Linear(16 * 4, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        # 将输入的十维向量转换为二维张量
        x = x.unsqueeze(1)
        # 卷积层
        x = self.conv1(x)
        # 激活函数
        x = torch.relu(x)
        # 池化层
        x = self.pool(x)
        # 将张量展平为一维向量
        x = x.view(-1, 16 * 4)
        # 全连接层
        x = self.fc1(x)
        # 激活函数
        x = torch.relu(x)
        # 输出层
        x = self.fc2(x)

        return x

# 定义训练函数
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        print(outputs)
        print(labels)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    train_loss = running_loss / len(train_loader.dataset)
    return train_loss

# 定义测试函数
def test(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss = running_loss / len(test_loader.dataset)
    test_acc = correct / total
    return test_loss, test_acc



In [65]:
#随机交叉验证数据导入
def load_data(cols,data,train_data,train_label,train_sample,test_data,test_label,test_sample):
    random.shuffle(cols)
    for col in cols:
        if cols.index(col) <= int(len(cols)*0.6):
            train_data.append(list(data.loc[:,col]))
            train_sample.append(col)
            if col.split('_')[0][-1] == 'N':
                train_label.append(0)
            else:
                train_label.append(1)
        else:
            test_data.append(list(data.loc[:,col]))
            test_sample.append(col)
            if col.split('_')[0][-1] == 'N':
                test_label.append(0)
            else:
                test_label.append(1)

In [66]:
import copy
import random
#导入数据集
N_data = pd.read_csv("./data/N.txt",sep="\t")
T_data = pd.read_csv("./data/T.txt",sep="\t")
n_cols = N_data.columns.tolist()[1:]
t_cols = T_data.columns.tolist()[1:]
t_copy_cols = copy.deepcopy(t_cols)
#random.shuffle(t_copy_cols)
t_copy_cols = t_copy_cols[:len(n_cols)]
data = N_data.merge(T_data)
train_data = []
train_label = []
train_sample = []
test_data = []
test_label = []
test_sample = []
load_data(n_cols,data,train_data,train_label,train_sample,test_data,test_label,test_sample)
load_data(t_copy_cols,data,train_data,train_label,train_sample,test_data,test_label,test_sample)
print(train_data)


[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0638297872340423, 0.0], [1.0411865356599306, 1.573791278814635, 0.6869736571839339, 0.0628330651695134, 0.2184964568142127, 0.1961518491892313, 0.150621055994887, 0.0046293270592335, 0.0501737359213992, 8.133073625525057], [0.1454691880891765, 0.108009132546634, 2.124227158433873, 0.6840327968944395, 0.2620830539986735, 1.5208410793300535, 1.5730070130200686, 0.259539115534229, 0.0026742376969855, 22.035251571307573], [0.0479396831622759, 0.8526415076719087, 0.2353402627966275, 0.0, 0.0, 0.1451264953912536, 0.125514806824868, 0.2745636399293988, 0.385696541805584, 2.706413022161217], [0.0483767102353003, 0.1120090721414266, 0.6994952343807448, 1.432255390991527, 0.0266876185354606, 2.134720174156602, 1.9945072181257224, 0.1772370753853568, 0.1076991081050299, 4.195358694385392], [2.5200613745789258, 0.5674503095183493, 0.8730004761820759, 0.1513200825382265, 0.0268615531132946, 0.4481402444401322, 0.7973404349129626, 0.0, 1.3968007618913214, 

In [67]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
# 加载自己的数据集，假设已经将数据存储在名为 "data.npy" 的文件中
data = torch.from_numpy(np.array(train_data)).float()
#data = torch.LongTensor(data)
# 加载自己的标签，假设已经将标签存储在名为 "labels.npy" 的文件中
labels = torch.from_numpy(np.array(train_label)).float()
#labels = torch.LongTensor(labels)
# 创建 TensorDataset 实例
train_dataset = TensorDataset(data, labels)
# 加载自己的数据集，假设已经将数据存储在名为 "data.npy" 的文件中
data = torch.from_numpy(np.array(test_data)).float()
#data = torch.LongTensor(data)
# 加载自己的标签，假设已经将标签存储在名为 "labels.npy" 的文件中
labels = torch.from_numpy(np.array(test_label)).float()
#labels = torch.LongTensor(labels)
test_dataset = TensorDataset(data, labels)
labels


tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])

In [68]:
# 设置随机种子
torch.manual_seed(42)
# 定义超参数
batch_size = 1
learning_rate = 0.001
num_epochs = 2

import torch
from torch.utils.data import TensorDataset, DataLoader

# # 数据预处理
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
# ])
#
# # 加载数据集
# train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
# test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



# 创建模型实例和优化器
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_loader
# 训练模型
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(model, test_loader, criterion, device)
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}')


tensor([[ 0.4208, -0.0152,  0.0315, -0.2340,  0.0633,  0.0652, -0.1410,  0.1994,
         -0.1013,  0.1021]], grad_fn=<AddmmBackward0>)
tensor([0.])


RuntimeError: expected scalar type Long but found Float