In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [10]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        # self.relu1 = nn.ReLU()
        self.relu1 = nn.Sigmoid()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        # self.relu2 = nn.ReLU()
        self.relu2 = nn.Sigmoid()
        self.fc3 = nn.Linear(hidden_size, num_classes)
        
        self.dropout1 = nn.Dropout()
        self.dropout2 = nn.Dropout()
        
        

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.dropout1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.dropout2(out)
        out = self.fc3(out)
        return out

In [14]:
# 读取CSV文件
data = pd.read_csv('new_preprocessed_data.csv')

# 分割特征和标签
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# 将数据转换为PyTorch张量
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
from imblearn.over_sampling import SMOTE

# 创建SMOTE对象，指定采样策略和参数
smote = SMOTE(sampling_strategy='auto', k_neighbors=5, random_state=42)

# 对训练集进行过采样
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
X_train = torch.tensor(X_train_resampled, dtype=torch.float32)
y_train = torch.tensor(y_train_resampled, dtype=torch.long)


In [16]:


# 创建数据加载器
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 定义模型参数
input_size = X.shape[1]
hidden_size = 128
num_classes = len(torch.unique(y))

# 实例化模型
model = NeuralNetwork(input_size, hidden_size, num_classes)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)

# 训练模型
num_epochs = 20
# 20

for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        # 向前传播
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# 在测试集上进行预测
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, targets in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

    print(f'Accuracy on test set: {100 * correct / total:.2f}%')


from sklearn.metrics import confusion_matrix

# 假设model是训练好的神经网络模型

# 用模型进行预测
model.eval()
predictions = []
labels = []

with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.tolist())
        labels.extend(targets.tolist())

# 计算混淆矩阵
cm = confusion_matrix(labels, predictions)

# 计算每个类别的真阳性（TP）和假阴性（FN）
num_classes = cm.shape[0]
recall_per_class = []

for i in range(num_classes):
    TP = cm[i, i]
    FN = sum(cm[i, :]) - TP
    recall = TP / (TP + FN)
    recall_per_class.append(recall)

print("Recall per class:")
for i, recall in enumerate(recall_per_class):
    print(f"Class {i}: {recall:.4f}")
    
from sklearn.metrics import precision_score

# 假设predictions和labels是模型的预测结果和真实标签
# 使用sklearn.metrics.precision_score计算精确度
precision = precision_score(labels, predictions, average='weighted')
print(precision)

Epoch [1/20], Loss: 1.6039
Epoch [2/20], Loss: 1.6114
Epoch [3/20], Loss: 1.5982
Epoch [4/20], Loss: 1.5917
Epoch [5/20], Loss: 1.5975
Epoch [6/20], Loss: 1.6228
Epoch [7/20], Loss: 1.5991
Epoch [8/20], Loss: 1.6430
Epoch [9/20], Loss: 1.6185
Epoch [10/20], Loss: 1.6117
Epoch [11/20], Loss: 1.5998
Epoch [12/20], Loss: 1.6120
Epoch [13/20], Loss: 1.6162
Epoch [14/20], Loss: 1.6161
Epoch [15/20], Loss: 1.5824
Epoch [16/20], Loss: 1.5638
Epoch [17/20], Loss: 1.6095
Epoch [18/20], Loss: 1.5992
Epoch [19/20], Loss: 1.5746
Epoch [20/20], Loss: 1.6517
Accuracy on test set: 10.61%
Recall per class:
Class 0: 0.0991
Class 1: 0.3065
Class 2: 0.0201
Class 3: 0.1597
Class 4: 0.5833
0.7020388041304245


Accuracy on test set: 82.14%
Recall per class:
Class 0: 1.0000
Class 1: 0.0000
Class 2: 0.0000
Class 3: 0.0000
Class 4: 0.0000
0.6746306956959182

with resampling:
Epoch [1/20], Loss: 1.6039
Epoch [2/20], Loss: 1.6114
Epoch [3/20], Loss: 1.5982
Epoch [4/20], Loss: 1.5917
Epoch [5/20], Loss: 1.5975
Epoch [6/20], Loss: 1.6228
Epoch [7/20], Loss: 1.5991
Epoch [8/20], Loss: 1.6430
Epoch [9/20], Loss: 1.6185
Epoch [10/20], Loss: 1.6117
Epoch [11/20], Loss: 1.5998
Epoch [12/20], Loss: 1.6120
Epoch [13/20], Loss: 1.6162
Epoch [14/20], Loss: 1.6161
Epoch [15/20], Loss: 1.5824
Epoch [16/20], Loss: 1.5638
Epoch [17/20], Loss: 1.6095
Epoch [18/20], Loss: 1.5992
Epoch [19/20], Loss: 1.5746
Epoch [20/20], Loss: 1.6517
Accuracy on test set: 10.61%
Recall per class:
Class 0: 0.0991
Class 1: 0.3065
Class 2: 0.0201
Class 3: 0.1597
Class 4: 0.5833
0.7020388041304245