mobilenet_v3_large with softmax and SGD

In [25]:
import torch
import torchvision
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import matplotlib.pyplot as plt
from torchinfo import summary
import os
from sklearn.model_selection import train_test_split
from PIL import Image
import torch.optim as optim
from torchvision.models import swin_s
from torchvision.models import mobilenet_v3_large
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from collections import OrderedDict
from tqdm import tqdm

In [26]:
from elpv_reader import load_dataset
images, proba, types = load_dataset()

In [27]:
# check version number
import imblearn
from imblearn.over_sampling import SMOTE

train_set, test_set, train_lables, test_lables = \
train_test_split(images, proba, test_size=0.25,random_state=42)

my_mapping = {0.0:0, 0.3333333333333333:1, 0.6666666666666666:2, 1.0:3}

for i in range(len(train_set)):
    train_lables[i] = my_mapping[train_lables[i]]

for i in range(len(test_set)):
    test_lables[i] = my_mapping[test_lables[i]]

In [28]:
count_list = [0,0,0,0]
for i in range(len(train_set)):
    if train_lables[i] == 0:
        count_list[0] += 1
    if train_lables[i] == 1:
        count_list[1] += 1
    if train_lables[i] == 2:
        count_list[2] += 1
    if train_lables[i] == 3:
        count_list[3] += 1 
for i, j in zip(['The 0.0 has:', 'The 0.33 has:', 'The 0.66 has:', 'The 1.0 has:'], count_list):
    print(i, j)

The 0.0 has: 1136
The 0.33 has: 222
The 0.66 has: 75
The 1.0 has: 535


In [29]:
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=False):
        self.images = images
        self.label_mapping = {0: np.eye(4)[0], 1: np.eye(4)[1],\
                              2: np.eye(4)[2], 3: np.eye(4)[3]}
        self.labels = [self.label_mapping[label] for label in labels]
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        image = Image.fromarray(image)

        # 如果图像为灰度图像，将其复制到三个通道，转换为 RGB 图像
        if image.mode == 'L':
            image = image.convert('RGB')

        if self.transform:
            image = self.transform(image)
        return image, label


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
    ])


custom_train_dataset = CustomDataset(train_set, train_lables, transform)
custom_test_dataset = CustomDataset(test_set, test_lables, transform)


batch_size = 32
train_loader = DataLoader(custom_train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(custom_test_dataset, batch_size=batch_size, shuffle=False)

In [30]:
if torch.cuda.is_available():
    device = torch.device("cuda")  
else:
    device = torch.device("cpu")  

model = mobilenet_v3_large(weights=True)

classifier1 = nn.Sequential(OrderedDict([('fc1', nn.Linear(960, 4)),
                           ('output', nn.Softmax(dim=1))
                          ]))
    
model.classifier = classifier1

model.to(device)
print()






In [31]:

lossfunc = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=9e-3)

save_path = '.\model.pth'
best_accuracy = 79
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)  # 移动数据到 CUDA
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = lossfunc(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss = train_loss / len(train_loader)
    print(f'Epoch {epoch + 1}/{num_epochs} - Training Loss: {train_loss:.4f}')


    model.eval()
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            _, real_labels = torch.max(labels, 1)
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(real_labels.cpu().numpy())

    
    accuracy = accuracy_score(all_labels, all_predictions)
    conf_matrix = confusion_matrix(all_labels, all_predictions)

    if best_accuracy < accuracy:
        best_accuracy = accuracy
        torch.save(model.state_dict(), save_path)
        print(f'Epoch [{epoch + 1}/{num_epochs}] - Saved Best Model (Best Accuracy: {best_accuracy:.4f})')

    print(f'Accuracy: {accuracy:.4f}')
    print('Confusion Matrix:')
    print(conf_matrix)

100%|██████████| 62/62 [00:05<00:00, 12.01it/s]


Epoch 1/30 - Training Loss: 1.1982
Accuracy: 0.4421
Confusion Matrix:
[[277  45  50   0]
 [ 54   8  11   0]
 [ 25   1   5   0]
 [138   8  34   0]]


100%|██████████| 62/62 [00:05<00:00, 12.19it/s]


Epoch 2/30 - Training Loss: 1.1240
Accuracy: 0.4345
Confusion Matrix:
[[268   0  46  58]
 [ 52   0  10  11]
 [ 22   0   7   2]
 [136   0  34  10]]


100%|██████████| 62/62 [00:05<00:00, 12.04it/s]


Epoch 3/30 - Training Loss: 1.0928
Accuracy: 0.4954
Confusion Matrix:
[[314   1   9  48]
 [ 56   0   4  13]
 [ 29   0   0   2]
 [163   1   5  11]]


100%|██████████| 62/62 [00:05<00:00, 12.00it/s]


Epoch 4/30 - Training Loss: 1.0724
Accuracy: 0.5107
Confusion Matrix:
[[302   0   0  70]
 [ 62   0   0  11]
 [ 27   0   0   4]
 [143   1   3  33]]


100%|██████████| 62/62 [00:09<00:00,  6.36it/s]


Epoch 5/30 - Training Loss: 1.0589
Accuracy: 0.5213
Confusion Matrix:
[[295   0   1  76]
 [ 57   1   0  15]
 [ 25   0   0   6]
 [132   1   1  46]]


100%|██████████| 62/62 [00:05<00:00, 11.56it/s]


Epoch 6/30 - Training Loss: 1.0472
Accuracy: 0.5579
Confusion Matrix:
[[297   0   0  75]
 [ 50   0   0  23]
 [ 17   0   0  14]
 [111   0   0  69]]


100%|██████████| 62/62 [00:05<00:00, 11.81it/s]


Epoch 7/30 - Training Loss: 1.0391
Accuracy: 0.6738
Confusion Matrix:
[[340   0   0  32]
 [ 67   0   0   6]
 [ 24   0   0   7]
 [ 78   0   0 102]]


100%|██████████| 62/62 [00:05<00:00, 11.55it/s]


Epoch 8/30 - Training Loss: 1.0310
Accuracy: 0.7012
Confusion Matrix:
[[354   0   0  18]
 [ 70   0   0   3]
 [ 27   0   0   4]
 [ 74   0   0 106]]


100%|██████████| 62/62 [00:05<00:00, 11.25it/s]


Epoch 9/30 - Training Loss: 1.0259
Accuracy: 0.7027
Confusion Matrix:
[[359   0   0  13]
 [ 69   0   0   4]
 [ 28   0   0   3]
 [ 78   0   0 102]]


100%|██████████| 62/62 [00:05<00:00, 11.06it/s]


Epoch 10/30 - Training Loss: 1.0228
Accuracy: 0.7012
Confusion Matrix:
[[337   0   0  35]
 [ 65   0   0   8]
 [ 23   0   0   8]
 [ 57   0   0 123]]


100%|██████████| 62/62 [00:05<00:00, 11.13it/s]


Epoch 11/30 - Training Loss: 1.0172
Accuracy: 0.7104
Confusion Matrix:
[[347   0   0  25]
 [ 67   0   0   6]
 [ 27   0   0   4]
 [ 61   0   0 119]]


100%|██████████| 62/62 [00:05<00:00, 10.99it/s]


Epoch 12/30 - Training Loss: 1.0119
Accuracy: 0.7210
Confusion Matrix:
[[354   0   0  18]
 [ 70   0   0   3]
 [ 29   0   0   2]
 [ 61   0   0 119]]


100%|██████████| 62/62 [00:05<00:00, 10.88it/s]


Epoch 13/30 - Training Loss: 1.0065
Accuracy: 0.7058
Confusion Matrix:
[[354   0   0  18]
 [ 69   0   0   4]
 [ 29   0   0   2]
 [ 71   0   0 109]]


100%|██████████| 62/62 [00:05<00:00, 10.79it/s]


Epoch 14/30 - Training Loss: 1.0038
Accuracy: 0.7210
Confusion Matrix:
[[358   0   0  14]
 [ 70   0   0   3]
 [ 29   0   0   2]
 [ 65   0   0 115]]


100%|██████████| 62/62 [00:05<00:00, 10.78it/s]


Epoch 15/30 - Training Loss: 0.9975
Accuracy: 0.7195
Confusion Matrix:
[[360   0   0  12]
 [ 70   0   0   3]
 [ 29   0   0   2]
 [ 68   0   0 112]]


100%|██████████| 62/62 [00:05<00:00, 10.97it/s]


Epoch 16/30 - Training Loss: 0.9957
Accuracy: 0.7149
Confusion Matrix:
[[357   0   0  15]
 [ 70   0   0   3]
 [ 28   0   0   3]
 [ 68   0   0 112]]


100%|██████████| 62/62 [00:05<00:00, 10.80it/s]


Epoch 17/30 - Training Loss: 0.9894
Accuracy: 0.7271
Confusion Matrix:
[[362   0   0  10]
 [ 67   0   0   6]
 [ 28   0   0   3]
 [ 65   0   0 115]]


100%|██████████| 62/62 [00:05<00:00, 10.75it/s]


Epoch 18/30 - Training Loss: 0.9900
Accuracy: 0.7210
Confusion Matrix:
[[357   0   0  15]
 [ 67   0   0   6]
 [ 27   0   0   4]
 [ 64   0   0 116]]


100%|██████████| 62/62 [00:05<00:00, 10.61it/s]


Epoch 19/30 - Training Loss: 0.9879
Accuracy: 0.7210
Confusion Matrix:
[[363   0   0   9]
 [ 69   0   0   4]
 [ 29   0   0   2]
 [ 70   0   0 110]]


100%|██████████| 62/62 [00:05<00:00, 10.85it/s]


Epoch 20/30 - Training Loss: 0.9805
Accuracy: 0.7271
Confusion Matrix:
[[360   0   0  12]
 [ 69   0   0   4]
 [ 28   0   0   3]
 [ 63   0   0 117]]


100%|██████████| 62/62 [00:05<00:00, 10.70it/s]


Epoch 21/30 - Training Loss: 0.9771
Accuracy: 0.7256
Confusion Matrix:
[[360   0   0  12]
 [ 68   0   0   5]
 [ 27   0   0   4]
 [ 64   0   0 116]]


100%|██████████| 62/62 [00:05<00:00, 10.62it/s]


Epoch 22/30 - Training Loss: 0.9762
Accuracy: 0.7287
Confusion Matrix:
[[354   0   0  18]
 [ 69   0   0   4]
 [ 26   0   0   5]
 [ 56   0   0 124]]


100%|██████████| 62/62 [00:06<00:00,  9.66it/s]


Epoch 23/30 - Training Loss: 0.9745
Accuracy: 0.7332
Confusion Matrix:
[[353   0   0  19]
 [ 65   0   0   8]
 [ 23   0   0   8]
 [ 52   0   0 128]]


100%|██████████| 62/62 [00:06<00:00,  9.85it/s]


Epoch 24/30 - Training Loss: 0.9639
Accuracy: 0.7256
Confusion Matrix:
[[358   0   0  14]
 [ 69   0   0   4]
 [ 27   0   0   4]
 [ 62   0   0 118]]


100%|██████████| 62/62 [00:06<00:00,  9.92it/s]


Epoch 25/30 - Training Loss: 0.9565
Accuracy: 0.7378
Confusion Matrix:
[[350   0   0  22]
 [ 65   0   0   8]
 [ 24   0   0   7]
 [ 46   0   0 134]]


100%|██████████| 62/62 [00:06<00:00,  9.80it/s]


Epoch 26/30 - Training Loss: 0.9620
Accuracy: 0.7378
Confusion Matrix:
[[359   0   0  13]
 [ 66   0   0   7]
 [ 25   0   0   6]
 [ 55   0   0 125]]


100%|██████████| 62/62 [00:06<00:00,  9.80it/s]


Epoch 27/30 - Training Loss: 0.9542
Accuracy: 0.7348
Confusion Matrix:
[[351   0   0  21]
 [ 67   0   0   6]
 [ 25   0   0   6]
 [ 49   0   0 131]]


100%|██████████| 62/62 [00:06<00:00,  9.46it/s]


Epoch 28/30 - Training Loss: 0.9523
Accuracy: 0.7302
Confusion Matrix:
[[355   0   0  17]
 [ 66   0   0   7]
 [ 23   0   0   8]
 [ 56   0   0 124]]


100%|██████████| 62/62 [00:06<00:00,  9.80it/s]


Epoch 29/30 - Training Loss: 0.9491
Accuracy: 0.7302
Confusion Matrix:
[[361   0   0  11]
 [ 66   0   0   7]
 [ 26   0   0   5]
 [ 62   0   0 118]]


100%|██████████| 62/62 [00:06<00:00,  9.86it/s]


Epoch 30/30 - Training Loss: 0.9454
Accuracy: 0.7363
Confusion Matrix:
[[355   0   0  17]
 [ 66   0   0   7]
 [ 22   0   0   9]
 [ 52   0   0 128]]


In [32]:
# 创建两个子集，分别存储mono和poly的标签和预测
mono_subset_labels = [label for label, img_type in zip(all_labels, types) if img_type == "mono"]
mono_subset_predictions = [pred for pred, img_type in zip(all_predictions, types) if img_type == "mono"]
poly_subset_labels = [label for label, img_type in zip(all_labels, types) if img_type == "poly"]
poly_subset_predictions = [pred for pred, img_type in zip(all_predictions, types) if img_type == "poly"]

# 计算不同类型的准确率和混淆矩阵
accuracy = accuracy_score(all_labels, all_predictions)
accuracy_mono = accuracy_score(mono_subset_labels, mono_subset_predictions)
accuracy_poly = accuracy_score(poly_subset_labels, poly_subset_predictions)
conf_matrix = confusion_matrix(all_labels, all_predictions)
conf_matrix_mono = confusion_matrix(mono_subset_labels, mono_subset_predictions)
conf_matrix_poly = confusion_matrix(poly_subset_labels, poly_subset_predictions)
f1_total = f1_score(all_labels, all_predictions, average='macro')
f1_mono = f1_score(mono_subset_labels, mono_subset_predictions, average='macro')
f1_poly = f1_score(poly_subset_labels, poly_subset_predictions, average='macro')

print('The total accuracy is:', accuracy)
print()
print('The accuracy of mono type is:', accuracy_mono)
print()
print('The accuracy of poly type is:', accuracy_poly)
print()
print('The total confusion matrix is:\n', conf_matrix)
print()
print('The confusion matrix of mono type is:\n', conf_matrix_mono)
print()
print('The confusion matrix of poly type is:\n', conf_matrix_poly)
print()
print('The total F1 Score is:', f1_total)
print()
print('The F1 Score of mono type is:', f1_mono)
print()
print('The F1 Score of poly type is:', f1_poly)


The total accuracy is: 0.7362804878048781

The accuracy of mono type is: 0.7125

The accuracy of poly type is: 0.75

The total confusion matrix is:
 [[355   0   0  17]
 [ 66   0   0   7]
 [ 22   0   0   9]
 [ 52   0   0 128]]

The confusion matrix of mono type is:
 [[129   0   0   5]
 [ 26   0   0   3]
 [  9   0   0   3]
 [ 23   0   0  42]]

The confusion matrix of poly type is:
 [[226   0   0  12]
 [ 40   0   0   4]
 [ 13   0   0   6]
 [ 29   0   0  86]]

The total F1 Score is: 0.39241223486116894

The F1 Score of mono type is: 0.3789006811341676

The F1 Score of poly type is: 0.39978481906733027
