mobilenet_v3_large with Relu and SGD

In [1]:
import torch
import torchvision
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import matplotlib.pyplot as plt
from torchinfo import summary
import os
from sklearn.model_selection import train_test_split
from PIL import Image
import torch.optim as optim
from torchvision.models import swin_s
from torchvision.models import mobilenet_v3_large
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from collections import OrderedDict
from tqdm import tqdm

In [2]:
from elpv_reader import load_dataset
images, proba, types = load_dataset()

In [3]:
# check version number
import imblearn
from imblearn.over_sampling import SMOTE

train_set, test_set, train_lables, test_lables = \
train_test_split(images, proba, test_size=0.25,random_state=42)

my_mapping = {0.0:0, 0.3333333333333333:1, 0.6666666666666666:2, 1.0:3}

for i in range(len(train_set)):
    train_lables[i] = my_mapping[train_lables[i]]

for i in range(len(test_set)):
    test_lables[i] = my_mapping[test_lables[i]]

In [4]:
count_list = [0,0,0,0]
for i in range(len(train_set)):
    if train_lables[i] == 0:
        count_list[0] += 1
    if train_lables[i] == 1:
        count_list[1] += 1
    if train_lables[i] == 2:
        count_list[2] += 1
    if train_lables[i] == 3:
        count_list[3] += 1 
for i, j in zip(['The 0.0 has:', 'The 0.33 has:', 'The 0.66 has:', 'The 1.0 has:'], count_list):
    print(i, j)

The 0.0 has: 1136
The 0.33 has: 222
The 0.66 has: 75
The 1.0 has: 535


In [5]:
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=False):
        self.images = images
        self.label_mapping = {0: np.eye(4)[0], 1: np.eye(4)[1],\
                              2: np.eye(4)[2], 3: np.eye(4)[3]}
        self.labels = [self.label_mapping[label] for label in labels]
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        image = Image.fromarray(image)

        # 如果图像为灰度图像，将其复制到三个通道，转换为 RGB 图像
        if image.mode == 'L':
            image = image.convert('RGB')

        if self.transform:
            image = self.transform(image)
        return image, label


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
    ])


custom_train_dataset = CustomDataset(train_set, train_lables, transform)
custom_test_dataset = CustomDataset(test_set, test_lables, transform)


batch_size = 32
train_loader = DataLoader(custom_train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(custom_test_dataset, batch_size=batch_size, shuffle=False)

In [6]:
if torch.cuda.is_available():
    device = torch.device("cuda")  
else:
    device = torch.device("cpu")  

model = mobilenet_v3_large(weights=True)

classifier1 = nn.Sequential(OrderedDict([('fc1', nn.Linear(960, 4)),
                           ('output', nn.ReLU())
                          ]))
    
model.classifier = classifier1

model.to(device)
print()






In [7]:

lossfunc = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=9e-3)

save_path = '.\model.pth'
best_accuracy = 75
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)  # 移动数据到 CUDA
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = lossfunc(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss = train_loss / len(train_loader)
    print(f'Epoch {epoch + 1}/{num_epochs} - Training Loss: {train_loss:.4f}')


    model.eval()
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            _, real_labels = torch.max(labels, 1)
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(real_labels.cpu().numpy())

    
    accuracy = accuracy_score(all_labels, all_predictions)
    conf_matrix = confusion_matrix(all_labels, all_predictions)

    if best_accuracy < accuracy:
        best_accuracy = accuracy
        torch.save(model.state_dict(), save_path)
        print(f'Epoch [{epoch + 1}/{num_epochs}] - Saved Best Model (Best Accuracy: {best_accuracy:.4f})')

    print(f'Accuracy: {accuracy:.4f}')
    print('Confusion Matrix:')
    print(conf_matrix)

100%|██████████| 62/62 [00:08<00:00,  6.96it/s]


Epoch 1/20 - Training Loss: 0.9859
Accuracy: 0.3445
Confusion Matrix:
[[ 67   1   2 302]
 [  6   0   0  67]
 [  7   0   0  24]
 [ 13   0   8 159]]


100%|██████████| 62/62 [00:05<00:00, 10.82it/s]


Epoch 2/20 - Training Loss: 0.8161
Accuracy: 0.4055
Confusion Matrix:
[[188   2   1 181]
 [ 27   0   0  46]
 [ 14   0   1  16]
 [102   1   0  77]]


100%|██████████| 62/62 [00:05<00:00, 10.40it/s]


Epoch 3/20 - Training Loss: 0.7526
Accuracy: 0.4527
Confusion Matrix:
[[274   1   0  97]
 [ 51   0   1  21]
 [ 28   0   0   3]
 [156   0   1  23]]


100%|██████████| 62/62 [00:05<00:00, 10.82it/s]


Epoch 4/20 - Training Loss: 0.6981
Accuracy: 0.5137
Confusion Matrix:
[[265   0   0 107]
 [ 51   0   0  22]
 [ 19   0   0  12]
 [107   1   0  72]]


100%|██████████| 62/62 [00:05<00:00, 10.79it/s]


Epoch 5/20 - Training Loss: 0.6674
Accuracy: 0.6067
Confusion Matrix:
[[259   0   0 113]
 [ 45   0   0  28]
 [ 13   0   0  18]
 [ 41   0   0 139]]


100%|██████████| 62/62 [00:05<00:00, 10.52it/s]


Epoch 6/20 - Training Loss: 0.6260
Accuracy: 0.6006
Confusion Matrix:
[[269   0   0 103]
 [ 49   2   0  22]
 [ 16   0   0  15]
 [ 57   0   0 123]]


100%|██████████| 62/62 [00:05<00:00, 10.44it/s]


Epoch 7/20 - Training Loss: 0.5830
Accuracy: 0.6021
Confusion Matrix:
[[266   2   0 104]
 [ 39   7   0  27]
 [ 17   0   0  14]
 [ 58   0   0 122]]


100%|██████████| 62/62 [00:05<00:00, 10.40it/s]


Epoch 8/20 - Training Loss: 0.5375
Accuracy: 0.6418
Confusion Matrix:
[[262   6   0 104]
 [ 35  10   0  28]
 [ 15   0   0  16]
 [ 31   0   0 149]]


100%|██████████| 62/62 [00:06<00:00, 10.26it/s]


Epoch 9/20 - Training Loss: 0.4985
Accuracy: 0.7241
Confusion Matrix:
[[330   8   0  34]
 [ 52  11   0  10]
 [ 26   0   0   5]
 [ 45   1   0 134]]


100%|██████████| 62/62 [00:06<00:00, 10.15it/s]


Epoch 10/20 - Training Loss: 0.4644
Accuracy: 0.7256
Confusion Matrix:
[[317   5   0  50]
 [ 45  11   0  17]
 [ 19   0   0  12]
 [ 29   3   0 148]]


100%|██████████| 62/62 [00:06<00:00, 10.17it/s]


Epoch 11/20 - Training Loss: 0.4375
Accuracy: 0.7271
Confusion Matrix:
[[307  20   0  45]
 [ 34  26   0  13]
 [ 15   4   0  12]
 [ 30   6   0 144]]


100%|██████████| 62/62 [00:06<00:00, 10.03it/s]


Epoch 12/20 - Training Loss: 0.4011
Accuracy: 0.7409
Confusion Matrix:
[[324  12   0  36]
 [ 35  27   0  11]
 [ 20   2   0   9]
 [ 40   5   0 135]]


100%|██████████| 62/62 [00:06<00:00,  9.94it/s]


Epoch 13/20 - Training Loss: 0.3679
Accuracy: 0.7287
Confusion Matrix:
[[310  30   0  32]
 [ 31  31   0  11]
 [ 18   2   0  11]
 [ 36   7   0 137]]


100%|██████████| 62/62 [00:06<00:00, 10.09it/s]


Epoch 14/20 - Training Loss: 0.3427
Accuracy: 0.7134
Confusion Matrix:
[[302  41   0  29]
 [ 35  31   0   7]
 [ 16   9   0   6]
 [ 38   7   0 135]]


100%|██████████| 62/62 [00:06<00:00, 10.11it/s]


Epoch 15/20 - Training Loss: 0.3065
Accuracy: 0.7515
Confusion Matrix:
[[316  13   0  43]
 [ 35  31   0   7]
 [ 14   5   0  12]
 [ 26   8   0 146]]


100%|██████████| 62/62 [00:06<00:00, 10.11it/s]


Epoch 16/20 - Training Loss: 0.2763
Accuracy: 0.7515
Confusion Matrix:
[[341   7   0  24]
 [ 48  19   0   6]
 [ 16   2   0  13]
 [ 39   8   0 133]]


100%|██████████| 62/62 [00:06<00:00, 10.05it/s]


Epoch 17/20 - Training Loss: 0.2472
Accuracy: 0.7591
Confusion Matrix:
[[351  15   0   6]
 [ 44  27   0   2]
 [ 23   2   0   6]
 [ 53   7   0 120]]


100%|██████████| 62/62 [00:06<00:00, 10.01it/s]


Epoch 18/20 - Training Loss: 0.2201
Accuracy: 0.7363
Confusion Matrix:
[[315  27   0  30]
 [ 34  32   0   7]
 [ 14   5   1  11]
 [ 34  11   0 135]]


100%|██████████| 62/62 [00:06<00:00,  9.92it/s]


Epoch 19/20 - Training Loss: 0.2028
Accuracy: 0.6631
Confusion Matrix:
[[249  55   0  68]
 [ 19  35   1  18]
 [ 11   6   2  12]
 [ 21  10   0 149]]


100%|██████████| 62/62 [00:06<00:00,  9.90it/s]


Epoch 20/20 - Training Loss: 0.1863
Accuracy: 0.7729
Confusion Matrix:
[[340  11   0  21]
 [ 38  26   1   8]
 [ 18   5   2   6]
 [ 34   5   2 139]]


In [8]:
# 创建两个子集，分别存储mono和poly的标签和预测
mono_subset_labels = [label for label, img_type in zip(all_labels, types) if img_type == "mono"]
mono_subset_predictions = [pred for pred, img_type in zip(all_predictions, types) if img_type == "mono"]
poly_subset_labels = [label for label, img_type in zip(all_labels, types) if img_type == "poly"]
poly_subset_predictions = [pred for pred, img_type in zip(all_predictions, types) if img_type == "poly"]

# 计算不同类型的准确率和混淆矩阵
accuracy = accuracy_score(all_labels, all_predictions)
accuracy_mono = accuracy_score(mono_subset_labels, mono_subset_predictions)
accuracy_poly = accuracy_score(poly_subset_labels, poly_subset_predictions)
conf_matrix = confusion_matrix(all_labels, all_predictions)
conf_matrix_mono = confusion_matrix(mono_subset_labels, mono_subset_predictions)
conf_matrix_poly = confusion_matrix(poly_subset_labels, poly_subset_predictions)
f1_total = f1_score(all_labels, all_predictions, average='macro')
f1_mono = f1_score(mono_subset_labels, mono_subset_predictions, average='macro')
f1_poly = f1_score(poly_subset_labels, poly_subset_predictions, average='macro')

print('The total accuracy is:', accuracy)
print()
print('The accuracy of mono type is:', accuracy_mono)
print()
print('The accuracy of poly type is:', accuracy_poly)
print()
print('The total confusion matrix is:\n', conf_matrix)
print()
print('The confusion matrix of mono type is:\n', conf_matrix_mono)
print()
print('The confusion matrix of poly type is:\n', conf_matrix_poly)
print()
print('The total F1 Score is:', f1_total)
print()
print('The F1 Score of mono type is:', f1_mono)
print()
print('The F1 Score of poly type is:', f1_poly)


The total accuracy is: 0.7728658536585366

The accuracy of mono type is: 0.7291666666666666

The accuracy of poly type is: 0.7980769230769231

The total confusion matrix is:
 [[340  11   0  21]
 [ 38  26   1   8]
 [ 18   5   2   6]
 [ 34   5   2 139]]

The confusion matrix of mono type is:
 [[121   7   0   6]
 [ 18   6   0   5]
 [  7   3   0   2]
 [ 13   2   2  48]]

The confusion matrix of poly type is:
 [[219   4   0  15]
 [ 20  20   1   3]
 [ 11   2   2   4]
 [ 21   3   0  91]]

The total F1 Score is: 0.5444088695398979

The F1 Score of mono type is: 0.4607906193484583

The F1 Score of poly type is: 0.5971299517084261
