In [1]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'melanoma-skin-cancer-dataset-of-10000-images:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F2035877%2F3376422%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240422%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240422T063147Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D8f56f822c946e9f054e93b4b2e781d69604d6cbbef0e49331a2b794c78970e1349116a3fba3cf01c5cafb6b9cc5e06bcfbcdede483890505696937a6efc9884d67a272a1b222623eb86bb83fa215e01225404e86ab4f95a38e61b53a3a1a91c2156a4b26dc4c86a4ac12c110b49f82059486c161ab7dcbd27119ed01ea482f69c6ce30dc6edb4396c0b21dffc8df30a5ead5e32686237aa91bce43d08b134a28f67c36a85b6b0597d905a0dfd44695d363b32afb50e1451c06b803a36ec7b5d6c2b097d1c163dbfc652e4ea2dee58d24bb63a39ee0ee2546231041ed9785daaa93f8e348ec294b101fa5245861343c79618956fe36b2aa5e9815a4867754557c'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')

Downloading melanoma-skin-cancer-dataset-of-10000-images, 103508268 bytes compressed
Downloaded and uncompressed: melanoma-skin-cancer-dataset-of-10000-images
Data source import complete.


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train/malignant/melanoma_7508.jpg
/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train/malignant/melanoma_7547.jpg
/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train/malignant/melanoma_7042.jpg
/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train/malignant/melanoma_9220.jpg
/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train/malignant/melanoma_6136.jpg
/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train/malignant/melanoma_6120.jpg
/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train/malignant/melanoma_8533.jpg
/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train/malignant/melanoma_7123.jpg
/kaggle/input/melanoma-skin-c

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
#定義設備和所需函式庫
!pip3 install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl
!pip3 install torchvision

[31mERROR: torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl is not a supported wheel on this platform.[0m[31m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1->torchvision)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1->torchvision)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.1->torchvision)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.2.1->torchvision)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.2.1->torchvision)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.2.1->torchvision)
  Usi

In [5]:
#檢查設備，設定gpu
import torch
print(torch.cuda.is_available())
device = torch.device("cuda")
!nvidia-smi
print(device)
!pwd

True
Tue Apr 23 06:22:44 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P8               9W /  70W |      3MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                               

In [6]:
import torch
import torchvision
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from torch.nn import Linear,Flatten,Conv2d,Sequential,MaxPool2d
import os
from PIL import Image
from torch import nn

class Mydataset(Dataset): #获取文件夹里的图片，并返回图片和label

    def __init__(self,root_dir,label_dir,transform=None):
        self.root_dir = root_dir
        self.label_dir = label_dir
        self.path = os.path.join(self.root_dir,self.label_dir)
        self.imgs_path = os.listdir(self.path)
        self.transform = transform

    def __getitem__(self, idx):
        img_name = self.imgs_path[idx]
        img_item_path = os.path.join(self.root_dir,self.label_dir,img_name)
        img = Image.open(img_item_path).convert('RGB') #转换成RGB形式的三通道图像
        if self.transform is not None:
            img = self.transform(img)
        label = self.label_dir
        if label=='benign':
          label = torch.tensor(0,dtype=int)
        if label=='malignant':
          label = torch.tensor(1,dtype=int)
        return img,label

    def __len__(self):
        return len(self.imgs_path)


#初始化container
class mymodules(nn.Module):
  def __init__(self):
        super(mymodules,self).__init__()
        self.module1 = Sequential(
            Conv2d(3,32,5,1,2),
            MaxPool2d(2),
            Conv2d(32,32,5,1,2),
            MaxPool2d(2),
            Conv2d(32,64,5,1,2),
            MaxPool2d(2),
            Flatten(),
            Linear(64*4*4,64),
            Linear(64,2)
            )
  def forward(self, x):
        x = self.module1(x)
        return x


# if __name__ =='__main__':
#     Mymodules = mymodules()
#     input = torch.ones((30,3,32,32))
#     output = Mymodules(input)
#     print(output.shape)

In [7]:
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from torchvision import transforms
from torch import nn
import numpy as np
import torchvision
import torch

#宣告重要變數
label_Benign = "benign"
label_Malignant = "malignant"

#讀取dataset，並修改成tensor
dataset_norm = transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
dataset_resize = transforms.Resize((224,224))
dataset_totensor = transforms.ToTensor()
dataset_transform_compose = transforms.Compose([dataset_totensor,dataset_norm,dataset_resize])

#建立train_set
root_dir = "/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train"
Benign_dataset = Mydataset(root_dir,label_Benign,dataset_transform_compose)
Malignant_dataset = Mydataset(root_dir,label_Malignant,dataset_transform_compose)
train_set = Benign_dataset+Malignant_dataset

#建立test_set
root_dir = "/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/test"
Benign_dataset = Mydataset(root_dir,label_Benign,dataset_transform_compose)
Malignant_dataset = Mydataset(root_dir,label_Malignant,dataset_transform_compose)
test_set = Benign_dataset+Malignant_dataset

#切割資料集
# train_ratio = 0.8
# train_size = int(len(train_set)*0.8)
# test_size = len(cancer_dataset)-train_size
# train_set,test_set = torch.utils.data.random_split(cancer_dataset,[train_size,test_size])

#讀到loader裡面
train_dataloader = DataLoader(dataset=train_set,batch_size=64,shuffle=True,num_workers=0,drop_last=True)
test_dataloader = DataLoader(dataset=test_set,batch_size=64,shuffle=True,num_workers=0,drop_last=True)
#圖片裝到容器的處理
vgg16 = torchvision.models.vgg16(progress=False,num_classes=2)
Mymodules = vgg16
Mymodules.to(device)
loss_cross = nn.CrossEntropyLoss()
loss_cross.to(device)
#設定優化器
optim = torch.optim.SGD(Mymodules.parameters(),lr=0.01)

# length 長度
train_data_size = len(train_set)
test_data_size = len(test_set)

print("訓練集長度：{}".format(train_data_size))
print("測試集長度：{}".format(test_data_size))

#設定需要的變數
#紀錄訓練次數
total_train_step = 0
#記錄測試次數
total_test_step = 0
#訓練輪數
epoch = 10
#加入tensorboard
writer = SummaryWriter("./logs_train")

for i in range(epoch):
    print("-------第 {} 輪訓練開始-------".format(i+1))

    #訓練步驟開始
    Mymodules.train()
    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = Mymodules(imgs)
        loss = loss_cross(outputs, targets)

        #優化器模型
        optim.zero_grad()
        loss.backward()
        optim.step()

        total_train_step = total_train_step + 1
        #每訓練50次才會印出，並且記錄到tensorboard
        if total_train_step % 50 == 0:
            print("訓練次數：{}, Loss: {}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)

    #測試步驟開始
    Mymodules.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = Mymodules(imgs)
            loss = loss_cross(outputs, targets)
            total_test_loss = total_test_loss + loss.item()

            #計算準確率
            prediction = outputs.argmax(1)
            accuracy = (prediction==targets).sum()
            total_accuracy = total_accuracy+accuracy

    print("整體測試集上的Loss: {}".format(total_test_loss))
    print("整體測試集上的正確率:{:.2f}".format(total_accuracy/test_data_size))
    writer.add_scalar("test_loss",total_test_loss,total_test_step)
    writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)
    total_test_step = total_test_step + 1

    torch.save(Mymodules, "Mymodules_{}.pth".format(i))
    print("模型已保存")

writer.close()


訓練集長度：9605
測試集長度：1000
-------第 1 輪訓練開始-------
訓練次數：50, Loss: 0.6249301433563232
訓練次數：100, Loss: 0.5374617576599121
訓練次數：150, Loss: 0.4079572856426239
整體測試集上的Loss: 4.80490779876709
整體測試集上的正確率:0.83
模型已保存
-------第 2 輪訓練開始-------
訓練次數：200, Loss: 0.3561549186706543
訓練次數：250, Loss: 0.4645082354545593
訓練次數：300, Loss: 0.25579988956451416
整體測試集上的Loss: 4.294448167085648
整體測試集上的正確率:0.85
模型已保存
-------第 3 輪訓練開始-------
訓練次數：350, Loss: 0.3770611584186554
訓練次數：400, Loss: 0.23668178915977478
訓練次數：450, Loss: 0.28955185413360596
整體測試集上的Loss: 4.27553652971983
整體測試集上的正確率:0.85
模型已保存
-------第 4 輪訓練開始-------
訓練次數：500, Loss: 0.2119666337966919
訓練次數：550, Loss: 0.2545451521873474
訓練次數：600, Loss: 0.5309053659439087
整體測試集上的Loss: 4.8876005709171295
整體測試集上的正確率:0.81
模型已保存
-------第 5 輪訓練開始-------
訓練次數：650, Loss: 0.2293769121170044
訓練次數：700, Loss: 0.29585129022598267
訓練次數：750, Loss: 0.2438284456729889
整體測試集上的Loss: 3.967190593481064
整體測試集上的正確率:0.86
模型已保存
-------第 6 輪訓練開始-------
訓練次數：800, Loss: 0.24823476374149323
訓練次數：8